Compare commits
6 Commits
v0.1.13
...
bagatur/ve
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e89ffb7c92 | ||
|
|
c7f9d8b812 | ||
|
|
b18685ff7b | ||
|
|
163bb5ee7e | ||
|
|
94256d01d3 | ||
|
|
2523e27330 |
22
.github/scripts/get_min_versions.py
vendored
@@ -4,12 +4,7 @@ import tomllib
|
||||
from packaging.version import parse as parse_version
|
||||
import re
|
||||
|
||||
MIN_VERSION_LIBS = [
|
||||
"langchain-core",
|
||||
"langchain-community",
|
||||
"langchain",
|
||||
"langchain-text-splitters",
|
||||
]
|
||||
MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain", "langchain-text-splitters"]
|
||||
|
||||
|
||||
def get_min_version(version: str) -> str:
|
||||
@@ -61,13 +56,12 @@ def get_min_version_from_toml(toml_path: str):
|
||||
return min_versions
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Get the TOML file path from the command line argument
|
||||
toml_file = sys.argv[1]
|
||||
# Get the TOML file path from the command line argument
|
||||
toml_file = sys.argv[1]
|
||||
|
||||
# Call the function to get the minimum versions
|
||||
min_versions = get_min_version_from_toml(toml_file)
|
||||
# Call the function to get the minimum versions
|
||||
min_versions = get_min_version_from_toml(toml_file)
|
||||
|
||||
print(
|
||||
" ".join([f"{lib}=={version}" for lib, version in min_versions.items()])
|
||||
) # noqa: T201
|
||||
print(
|
||||
" ".join([f"{lib}=={version}" for lib, version in min_versions.items()])
|
||||
) # noqa: T201
|
||||
|
||||
1
.github/workflows/_integration_test.yml
vendored
@@ -75,7 +75,6 @@ jobs:
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
|
||||
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
|
||||
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
|
||||
run: |
|
||||
make integration_tests
|
||||
|
||||
|
||||
37
.github/workflows/_release.yml
vendored
@@ -157,24 +157,6 @@ jobs:
|
||||
run: make tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Get minimum versions
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
id: min-version
|
||||
run: |
|
||||
poetry run pip install packaging
|
||||
min_versions="$(poetry run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml)"
|
||||
echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
|
||||
echo "min-versions=$min_versions"
|
||||
|
||||
- name: Run unit tests with minimum dependency versions
|
||||
if: ${{ steps.min-version.outputs.min-versions != '' }}
|
||||
env:
|
||||
MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
|
||||
run: |
|
||||
poetry run pip install $MIN_VERSIONS
|
||||
make tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: 'Authenticate to Google Cloud'
|
||||
id: 'auth'
|
||||
uses: google-github-actions/auth@v2
|
||||
@@ -214,10 +196,27 @@ jobs:
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
|
||||
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
|
||||
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
|
||||
run: make integration_tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Get minimum versions
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
id: min-version
|
||||
run: |
|
||||
poetry run pip install packaging
|
||||
min_versions="$(poetry run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml)"
|
||||
echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
|
||||
echo "min-versions=$min_versions"
|
||||
|
||||
- name: Run unit tests with minimum dependency versions
|
||||
if: ${{ steps.min-version.outputs.min-versions != '' }}
|
||||
env:
|
||||
MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
|
||||
run: |
|
||||
poetry run pip install $MIN_VERSIONS
|
||||
make tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
publish:
|
||||
needs:
|
||||
- build
|
||||
|
||||
24
.github/workflows/check-broken-links.yml
vendored
@@ -1,24 +0,0 @@
|
||||
name: Check Broken Links
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 13 * * *'
|
||||
|
||||
jobs:
|
||||
check-links:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Use Node.js 18.x
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 18.x
|
||||
cache: "yarn"
|
||||
cache-dependency-path: ./docs/yarn.lock
|
||||
- name: Install dependencies
|
||||
run: yarn install --immutable --mode=skip-build
|
||||
working-directory: ./docs
|
||||
- name: Check broken links
|
||||
run: yarn check-broken-links
|
||||
working-directory: ./docs
|
||||
1
.gitignore
vendored
@@ -116,7 +116,6 @@ celerybeat.pid
|
||||
.env
|
||||
.envrc
|
||||
.venv*
|
||||
venv*
|
||||
env/
|
||||
ENV/
|
||||
env.bak/
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
" \n",
|
||||
"[Together AI](https://python.langchain.com/docs/integrations/llms/together) has a broad set of OSS LLMs via inference API.\n",
|
||||
"\n",
|
||||
"See [here](https://docs.together.ai/docs/inference-models). We use `\"mistralai/Mixtral-8x7B-Instruct-v0.1` for RAG on the Mixtral paper.\n",
|
||||
"See [here](https://api.together.xyz/playground). We use `\"mistralai/Mixtral-8x7B-Instruct-v0.1` for RAG on the Mixtral paper.\n",
|
||||
"\n",
|
||||
"Download the paper:\n",
|
||||
"https://arxiv.org/pdf/2401.04088.pdf"
|
||||
@@ -148,7 +148,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -21,4 +21,7 @@ wget -q https://raw.githubusercontent.com/langchain-ai/langgraph/main/README.md
|
||||
|
||||
yarn
|
||||
|
||||
poetry run quarto preview docs
|
||||
poetry run quarto render versioned_docs
|
||||
poetry run quarto render docs
|
||||
|
||||
yarn start
|
||||
@@ -14,20 +14,19 @@ For the most part, new integrations should be added to the Community package. Pa
|
||||
|
||||
In the following sections, we'll walk through how to contribute to each of these packages from a fake company, `Parrot Link AI`.
|
||||
|
||||
## Community package
|
||||
## Community Package
|
||||
|
||||
The `langchain-community` package is in `libs/community` and contains most integrations.
|
||||
|
||||
It can be installed with `pip install langchain-community`, and exported members can be imported with code like
|
||||
It is installed by users with `pip install langchain-community`, and exported members can be imported with code like
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models import ChatParrotLink
|
||||
from langchain_community.llms import ParrotLinkLLM
|
||||
from langchain_community.chat_models import ParrotLinkLLM
|
||||
from langchain_community.llms import ChatParrotLink
|
||||
from langchain_community.vectorstores import ParrotLinkVectorStore
|
||||
```
|
||||
|
||||
The `community` package relies on manually-installed dependent packages, so you will see errors
|
||||
if you try to import a package that is not installed. In our fake example, if you tried to import `ParrotLinkLLM` without installing `parrot-link-sdk`, you will see an `ImportError` telling you to install it when trying to use it.
|
||||
The community package relies on manually-installed dependent packages, so you will see errors if you try to import a package that is not installed. In our fake example, if you tried to import `ParrotLinkLLM` without installing `parrot-link-sdk`, you will see an `ImportError` telling you to install it when trying to use it.
|
||||
|
||||
Let's say we wanted to implement a chat model for Parrot Link AI. We would create a new file in `libs/community/langchain_community/chat_models/parrot_link.py` with the following code:
|
||||
|
||||
@@ -40,7 +39,7 @@ class ChatParrotLink(BaseChatModel):
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.chat_models import ChatParrotLink
|
||||
from langchain_parrot_link import ChatParrotLink
|
||||
|
||||
model = ChatParrotLink()
|
||||
"""
|
||||
@@ -57,16 +56,9 @@ And add documentation to:
|
||||
|
||||
- `docs/docs/integrations/chat/parrot_link.ipynb`
|
||||
|
||||
## Partner package in LangChain repo
|
||||
## Partner Packages
|
||||
|
||||
Partner packages can be hosted in the `LangChain` monorepo or in an external repo.
|
||||
|
||||
Partner package in the `LangChain` repo is placed in `libs/partners/{partner}`
|
||||
and the package source code is in `libs/partners/{partner}/langchain_{partner}`.
|
||||
|
||||
A package is
|
||||
installed by users with `pip install langchain-{partner}`, and the package members
|
||||
can be imported with code like:
|
||||
Partner packages are in `libs/partners/*` and are installed by users with `pip install langchain-{partner}`, and exported members can be imported with code like
|
||||
|
||||
```python
|
||||
from langchain_{partner} import X
|
||||
@@ -131,49 +123,13 @@ By default, this will include stubs for a Chat Model, an LLM, and/or a Vector St
|
||||
|
||||
### Write Unit and Integration Tests
|
||||
|
||||
Some basic tests are presented in the `tests/` directory. You should add more tests to cover your package's functionality.
|
||||
Some basic tests are generated in the tests/ directory. You should add more tests to cover your package's functionality.
|
||||
|
||||
For information on running and implementing tests, see the [Testing guide](./testing).
|
||||
|
||||
### Write documentation
|
||||
|
||||
Documentation is generated from Jupyter notebooks in the `docs/` directory. You should place the notebooks with examples
|
||||
to the relevant `docs/docs/integrations` directory in the monorepo root.
|
||||
|
||||
### (If Necessary) Deprecate community integration
|
||||
|
||||
Note: this is only necessary if you're migrating an existing community integration into
|
||||
a partner package. If the component you're integrating is net-new to LangChain (i.e.
|
||||
not already in the `community` package), you can skip this step.
|
||||
|
||||
Let's pretend we migrated our `ChatParrotLink` chat model from the community package to
|
||||
the partner package. We would need to deprecate the old model in the community package.
|
||||
|
||||
We would do that by adding a `@deprecated` decorator to the old model as follows, in
|
||||
`libs/community/langchain_community/chat_models/parrot_link.py`.
|
||||
|
||||
Before our change, our chat model might look like this:
|
||||
|
||||
```python
|
||||
class ChatParrotLink(BaseChatModel):
|
||||
...
|
||||
```
|
||||
|
||||
After our change, it would look like this:
|
||||
|
||||
```python
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
|
||||
@deprecated(
|
||||
since="0.0.<next community version>",
|
||||
removal="0.2.0",
|
||||
alternative_import="langchain_parrot_link.ChatParrotLink"
|
||||
)
|
||||
class ChatParrotLink(BaseChatModel):
|
||||
...
|
||||
```
|
||||
|
||||
You should do this for *each* component that you're migrating to the partner package.
|
||||
Documentation is generated from Jupyter notebooks in the `docs/` directory. You should move the generated notebooks to the relevant `docs/docs/integrations` directory in the monorepo root.
|
||||
|
||||
### Additional steps
|
||||
|
||||
@@ -187,15 +143,3 @@ Maintainer steps (Contributors should **not** do these):
|
||||
- [ ] set up pypi and test pypi projects
|
||||
- [ ] add credential secrets to Github Actions
|
||||
- [ ] add package to conda-forge
|
||||
|
||||
## Partner package in external repo
|
||||
|
||||
If you are creating a partner package in an external repo, you should follow the same steps as above,
|
||||
but you will need to set up your own CI/CD and package management.
|
||||
|
||||
Name your package as `langchain-{partner}-{integration}`.
|
||||
|
||||
Still, you have to create the `libs/partners/{partner}-{integration}` folder in the `LangChain` monorepo
|
||||
and add a `README.md` file with a link to the external repo.
|
||||
See this [example](https://github.com/langchain-ai/langchain/tree/master/libs/partners/google-genai).
|
||||
This allows keeping track of all the partner packages in the `LangChain` documentation.
|
||||
|
||||
@@ -20,11 +20,9 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"cell_type": "raw",
|
||||
"id": "0f316b5c",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-openai"
|
||||
]
|
||||
|
||||
@@ -20,11 +20,9 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"cell_type": "raw",
|
||||
"id": "b3121aa8",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-openai"
|
||||
]
|
||||
|
||||
@@ -36,11 +36,9 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"cell_type": "raw",
|
||||
"id": "b99b47ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-core langchain-openai langchain-anthropic"
|
||||
]
|
||||
|
||||
@@ -286,7 +286,7 @@ embeddings = OllamaEmbeddings()
|
||||
</TabItem>
|
||||
<TabItem value="cohere" label="Cohere (API)" default>
|
||||
|
||||
Make sure you have the `cohere` package installed and the appropriate environment variables set (these are the same as needed for the LLM).
|
||||
Make sure you have the `cohere` package installed an the appropriate environment variables set (these are the same as needed for the LLM).
|
||||
|
||||
```python
|
||||
from langchain_community.embeddings import CohereEmbeddings
|
||||
@@ -563,6 +563,7 @@ from langchain_community.vectorstores import FAISS
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
from langchain.tools.retriever import create_retriever_tool
|
||||
from langchain_community.tools.tavily_search import TavilySearchResults
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain import hub
|
||||
from langchain.agents import create_openai_functions_agent
|
||||
from langchain.agents import AgentExecutor
|
||||
|
||||
@@ -23,7 +23,7 @@ We also are working to share guides and cookbooks that demonstrate how to use th
|
||||
|
||||
## LangSmith Evaluation
|
||||
|
||||
LangSmith provides an integrated evaluation and tracing framework that allows you to check for regressions, compare systems, and easily identify and fix any sources of errors and performance issues. Check out the docs on [LangSmith Evaluation](https://docs.smith.langchain.com/evaluation) and additional [cookbooks](https://docs.smith.langchain.com/cookbook) for more detailed information on evaluating your applications.
|
||||
LangSmith provides an integrated evaluation and tracing framework that allows you to check for regressions, compare systems, and easily identify and fix any sources of errors and performance issues. Check out the docs on [LangSmith Evaluation](https://docs.smith.langchain.com/category/testing--evaluation) and additional [cookbooks](https://docs.smith.langchain.com/category/langsmith-cookbook) for more detailed information on evaluating your applications.
|
||||
|
||||
## LangChain benchmarks
|
||||
|
||||
|
||||
@@ -129,7 +129,7 @@
|
||||
"Who was famed for their Christian spirit?\n",
|
||||
"Who assimilted the Roman language?\n",
|
||||
"Who ruled the country of Normandy?\n",
|
||||
"What principality did William the conqueror found?\n",
|
||||
"What principality did William the conquerer found?\n",
|
||||
"What is the original meaning of the word Norman?\n",
|
||||
"When was the Latin version of the word Norman first recorded?\n",
|
||||
"What name comes from the English words Normans/Normanz?\"\"\"\n",
|
||||
|
||||
@@ -65,7 +65,6 @@
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"\n",
|
||||
"llm = ChatMaritalk(\n",
|
||||
" model=\"sabia-2-medium\", # Available models: sabia-2-small and sabia-2-medium\n",
|
||||
" api_key=\"\", # Insert your API key here\n",
|
||||
" temperature=0.7,\n",
|
||||
" max_tokens=100,\n",
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "raw",
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Yuan2.0\n",
|
||||
"sidebar_label: YUAN2\n",
|
||||
"---"
|
||||
],
|
||||
"metadata": {
|
||||
@@ -22,7 +22,7 @@
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# Yuan2.0\n",
|
||||
"# YUAN2.0\n",
|
||||
"\n",
|
||||
"This notebook shows how to use [YUAN2 API](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/docs/inference_server.md) in LangChain with the langchain.chat_models.ChatYuan2.\n",
|
||||
"\n",
|
||||
@@ -96,9 +96,9 @@
|
||||
},
|
||||
"source": [
|
||||
"### Setting Up Your API server\n",
|
||||
"Setting up your OpenAI compatible API server following [yuan2 openai api server](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/docs/Yuan2_fastchat.md).\n",
|
||||
"If you deployed api server locally, you can simply set `yuan2_api_key=\"EMPTY\"` or anything you want.\n",
|
||||
"Just make sure, the `yuan2_api_base` is set correctly."
|
||||
"Setting up your OpenAI compatible API server following [yuan2 openai api server](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/README-EN.md).\n",
|
||||
"If you deployed api server locally, you can simply set `api_key=\"EMPTY\"` or anything you want.\n",
|
||||
"Just make sure, the `api_base` is set correctly."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -187,7 +187,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(chat.invoke(messages))"
|
||||
"print(chat(messages))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -247,7 +247,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat.invoke(messages)"
|
||||
"chat(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You need the dgml-utils package to use the DocugamiLoader (run pip install directly without \"poetry run\" if you are not using poetry)\n",
|
||||
"!poetry run pip install docugami-langchain dgml-utils==0.3.0 --upgrade --quiet"
|
||||
"!poetry run pip install dgml-utils==0.3.0 --upgrade --quiet"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -56,7 +56,7 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from docugami_langchain.document_loaders import DocugamiLoader"
|
||||
"from langchain_community.document_loaders import DocugamiLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -470,7 +470,7 @@
|
||||
"source": [
|
||||
"from typing import Dict, List\n",
|
||||
"\n",
|
||||
"from docugami_langchain.document_loaders import DocugamiLoader\n",
|
||||
"from langchain_community.document_loaders import DocugamiLoader\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"loader = DocugamiLoader(docset_id=\"zo954yqy53wp\")\n",
|
||||
@@ -655,7 +655,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.18"
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1357,9 +1357,7 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Azure Cosmos DB Semantic Cache\n",
|
||||
"\n",
|
||||
"You can use this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) for caching."
|
||||
"## Azure Cosmos DB Semantic Cache"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"https://api.together.xyz/settings/api-keys. This can be passed in as init param\n",
|
||||
"``together_api_key`` or set as environment variable ``TOGETHER_API_KEY``.\n",
|
||||
"\n",
|
||||
"Together API reference: https://docs.together.ai/reference"
|
||||
"Together API reference: https://docs.together.ai/reference/inference"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -45,7 +45,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# default infer_api for a local deployed Yuan2.0 inference server\n",
|
||||
"infer_api = \"http://127.0.0.1:8000/yuan\"\n",
|
||||
"infer_api = \"http://127.0.0.1:8000\"\n",
|
||||
"\n",
|
||||
"# direct access endpoint in a proxied environment\n",
|
||||
"# import os\n",
|
||||
@@ -56,6 +56,7 @@
|
||||
" max_tokens=2048,\n",
|
||||
" temp=1.0,\n",
|
||||
" top_p=0.9,\n",
|
||||
" top_k=40,\n",
|
||||
" use_history=False,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@@ -88,7 +89,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(yuan_llm.invoke(question))"
|
||||
"print(yuan_llm(question))"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -503,21 +503,21 @@ from langchain_google_cloud_sql_pg import PostgreSQLEngine, PostgresVectorStore
|
||||
|
||||
### Vertex AI Vector Search
|
||||
|
||||
> [Google Cloud Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/vector-search/overview) from Google Cloud,
|
||||
> [Google Cloud Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/matching-engine/overview) from Google Cloud,
|
||||
> formerly known as `Vertex AI Matching Engine`, provides the industry's leading high-scale
|
||||
> low latency vector database. These vector databases are commonly
|
||||
> referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.
|
||||
|
||||
Install the python package:
|
||||
We need to install several python packages.
|
||||
|
||||
```bash
|
||||
pip install langchain-google-vertexai
|
||||
pip install tensorflow langchain-google-vertexai tensorflow-hub tensorflow-text
|
||||
```
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/google_vertex_ai_vector_search).
|
||||
|
||||
```python
|
||||
from langchain_google_vertexai import VectorSearchVectorStore
|
||||
from langchain_community.vectorstores import MatchingEngine
|
||||
```
|
||||
|
||||
### ScaNN
|
||||
|
||||
@@ -12,17 +12,13 @@ LangChain integrates with many providers.
|
||||
These providers have standalone `langchain-{provider}` packages for improved versioning, dependency management and testing.
|
||||
|
||||
- [AI21](/docs/integrations/providers/ai21)
|
||||
- [Airbyte](/docs/integrations/providers/airbyte)
|
||||
- [Anthropic](/docs/integrations/platforms/anthropic)
|
||||
- [Astra DB](/docs/integrations/providers/astradb)
|
||||
- [Elasticsearch](/docs/integrations/providers/elasticsearch)
|
||||
- [Exa Search](/docs/integrations/providers/exa_search)
|
||||
- [Fireworks](/docs/integrations/providers/fireworks)
|
||||
- [Google](/docs/integrations/platforms/google)
|
||||
- [Groq](/docs/integrations/providers/groq)
|
||||
- [IBM](/docs/integrations/providers/ibm)
|
||||
- [MistralAI](/docs/integrations/providers/mistralai)
|
||||
- [MongoDB](/docs/integrations/providers/mongodb_atlas)
|
||||
- [Nomic](/docs/integrations/providers/nomic)
|
||||
- [Nvidia](/docs/integrations/providers/nvidia)
|
||||
- [OpenAI](/docs/integrations/platforms/openai)
|
||||
|
||||
@@ -3,15 +3,6 @@
|
||||
All functionality related to `Microsoft Azure` and other `Microsoft` products.
|
||||
|
||||
## LLMs
|
||||
|
||||
### Azure ML
|
||||
|
||||
See a [usage example](/docs/integrations/llms/azure_ml).
|
||||
|
||||
```python
|
||||
from langchain_community.llms.azureml_endpoint import AzureMLOnlineEndpoint
|
||||
```
|
||||
|
||||
### Azure OpenAI
|
||||
|
||||
See a [usage example](/docs/integrations/llms/azure_openai).
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
# Arcee
|
||||
|
||||
>[Arcee](https://www.arcee.ai/about/about-us) enables the development and advancement
|
||||
> of what we coin as SLMs—small, specialized, secure, and scalable language models.
|
||||
> By offering a SLM Adaptation System and a seamless, secure integration,
|
||||
> `Arcee` empowers enterprises to harness the full potential of
|
||||
> domain-adapted language models, driving the transformative
|
||||
> innovation in operations.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
Get your `Arcee API` key.
|
||||
|
||||
|
||||
## LLMs
|
||||
|
||||
See a [usage example](/docs/integrations/llms/arcee).
|
||||
|
||||
```python
|
||||
from langchain_community.llms import Arcee
|
||||
```
|
||||
|
||||
## Retrievers
|
||||
|
||||
See a [usage example](/docs/integrations/retrievers/arcee).
|
||||
|
||||
```python
|
||||
from langchain_community.retrievers import ArceeRetriever
|
||||
```
|
||||
@@ -10,7 +10,12 @@ See a [tutorial provided by DataStax](https://docs.datastax.com/en/astra/astra-d
|
||||
|
||||
Install the following Python package:
|
||||
```bash
|
||||
pip install "langchain-astradb>=0.1.0"
|
||||
pip install "langchain-astradb>=0.0.1"
|
||||
```
|
||||
|
||||
Some old integrations require the `astrapy` package:
|
||||
```bash
|
||||
pip install "astrapy>=0.7.1"
|
||||
```
|
||||
|
||||
Get the [connection secrets](https://docs.datastax.com/en/astra/astra-db-vector/get-started/quickstart.html).
|
||||
@@ -56,7 +61,7 @@ See the [usage example](/docs/integrations/memory/astradb_chat_message_history#e
|
||||
|
||||
```python
|
||||
from langchain.globals import set_llm_cache
|
||||
from langchain_astradb import AstraDBCache
|
||||
from langchain_community.cache import AstraDBCache
|
||||
|
||||
set_llm_cache(AstraDBCache(
|
||||
api_endpoint=ASTRA_DB_API_ENDPOINT,
|
||||
@@ -71,7 +76,7 @@ Learn more in the [example notebook](/docs/integrations/llms/llm_caching#astra-d
|
||||
|
||||
```python
|
||||
from langchain.globals import set_llm_cache
|
||||
from langchain_astradb import AstraDBSemanticCache
|
||||
from langchain_community.cache import
|
||||
|
||||
set_llm_cache(AstraDBSemanticCache(
|
||||
embedding=my_embedding,
|
||||
@@ -87,7 +92,7 @@ Learn more in the [example notebook](/docs/integrations/memory/astradb_chat_mess
|
||||
## Document loader
|
||||
|
||||
```python
|
||||
from langchain_astradb import AstraDBLoader
|
||||
from langchain_community.document_loaders import AstraDBLoader
|
||||
|
||||
loader = AstraDBLoader(
|
||||
collection_name="my_collection",
|
||||
@@ -124,7 +129,7 @@ Learn more in the [example notebook](/docs/integrations/retrievers/self_query/as
|
||||
## Store
|
||||
|
||||
```python
|
||||
from langchain_astradb import AstraDBStore
|
||||
from langchain_community.storage import AstraDBStore
|
||||
|
||||
store = AstraDBStore(
|
||||
collection_name="my_kv_store",
|
||||
@@ -138,7 +143,7 @@ Learn more in the [example notebook](/docs/integrations/stores/astradb#astradbst
|
||||
## Byte Store
|
||||
|
||||
```python
|
||||
from langchain_astradb import AstraDBByteStore
|
||||
from langchain_community.storage import AstraDBByteStore
|
||||
|
||||
store = AstraDBByteStore(
|
||||
collection_name="my_kv_store",
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
# Baidu
|
||||
|
||||
>[Baidu Cloud](https://cloud.baidu.com/) is a cloud service provided by `Baidu, Inc.`,
|
||||
> headquartered in Beijing. It offers a cloud storage service, client software,
|
||||
> file management, resource sharing, and Third Party Integration.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
Register and get the `Qianfan` `AK` and `SK` keys [here](https://cloud.baidu.com/product/wenxinworkshop).
|
||||
|
||||
## LLMs
|
||||
|
||||
### Baidu Qianfan
|
||||
|
||||
See a [usage example](/docs/integrations/llms/baidu_qianfan_endpoint).
|
||||
|
||||
```python
|
||||
from langchain_community.llms import QianfanLLMEndpoint
|
||||
```
|
||||
|
||||
## Chat models
|
||||
|
||||
### Qianfan Chat Endpoint
|
||||
|
||||
See a [usage example](/docs/integrations/chat/baidu_qianfan_endpoint).
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models import QianfanChatEndpoint
|
||||
```
|
||||
|
||||
## Embedding models
|
||||
|
||||
### Baidu Qianfan
|
||||
|
||||
See a [usage example](/docs/integrations/text_embedding/baidu_qianfan_endpoint).
|
||||
|
||||
```python
|
||||
from langchain_community.embeddings import QianfanEmbeddingsEndpoint
|
||||
```
|
||||
|
||||
## Vector stores
|
||||
|
||||
### Baidu Cloud ElasticSearch VectorSearch
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/baiducloud_vector_search).
|
||||
|
||||
```python
|
||||
from langchain_community.vectorstores import BESVectorStore
|
||||
```
|
||||
@@ -1,30 +0,0 @@
|
||||
# CTranslate2
|
||||
|
||||
>[CTranslate2](https://opennmt.net/CTranslate2/quickstart.html) is a C++ and Python library
|
||||
> for efficient inference with Transformer models.
|
||||
>
|
||||
>The project implements a custom runtime that applies many performance optimization
|
||||
> techniques such as weights quantization, layers fusion, batch reordering, etc.,
|
||||
> to accelerate and reduce the memory usage of Transformer models on CPU and GPU.
|
||||
>
|
||||
>A full list of features and supported models is included in the
|
||||
> [project’s repository](https://opennmt.net/CTranslate2/guides/transformers.html).
|
||||
> To start, please check out the official [quickstart guide](https://opennmt.net/CTranslate2/quickstart.html).
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
Install the Python package:
|
||||
|
||||
```bash
|
||||
pip install ctranslate2
|
||||
```
|
||||
|
||||
|
||||
## LLMs
|
||||
|
||||
See a [usage example](/docs/integrations/llms/ctranslate2).
|
||||
|
||||
```python
|
||||
from langchain_community.llms import CTranslate2
|
||||
```
|
||||
@@ -8,8 +8,9 @@ It is broken into two parts: installation and setup, and then examples of DeepSp
|
||||
- Install the Python package with `pip install deepsparse`
|
||||
- Choose a [SparseZoo model](https://sparsezoo.neuralmagic.com/?useCase=text_generation) or export a support model to ONNX [using Optimum](https://github.com/neuralmagic/notebooks/blob/main/notebooks/opt-text-generation-deepsparse-quickstart/OPT_Text_Generation_DeepSparse_Quickstart.ipynb)
|
||||
|
||||
## Wrappers
|
||||
|
||||
## LLMs
|
||||
### LLM
|
||||
|
||||
There exists a DeepSparse LLM wrapper, which you can access with:
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
|
||||
```bash
|
||||
pip install dgml-utils
|
||||
pip install docugami-langchain
|
||||
```
|
||||
|
||||
## Document Loader
|
||||
@@ -17,5 +16,5 @@ pip install docugami-langchain
|
||||
See a [usage example](/docs/integrations/document_loaders/docugami).
|
||||
|
||||
```python
|
||||
from docugami_langchain.document_loaders import DocugamiLoader
|
||||
from langchain_community.document_loaders import DocugamiLoader
|
||||
```
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
# Eden AI
|
||||
|
||||
>[Eden AI](https://docs.edenai.co/docs/getting-started-with-eden-ai) user interface (UI)
|
||||
> is designed for handling the AI projects. With `Eden AI Portal`,
|
||||
> you can perform no-code AI using the best engines for the market.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
Accessing the Eden AI API requires an API key, which you can get by
|
||||
[creating an account](https://app.edenai.run/user/register) and
|
||||
heading [here](https://app.edenai.run/admin/account/settings).
|
||||
|
||||
## LLMs
|
||||
|
||||
See a [usage example](/docs/integrations/llms/edenai).
|
||||
|
||||
```python
|
||||
from langchain_community.llms import EdenAI
|
||||
|
||||
```
|
||||
|
||||
## Chat models
|
||||
|
||||
See a [usage example](/docs/integrations/chat/edenai).
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models.edenai import ChatEdenAI
|
||||
```
|
||||
|
||||
## Embedding models
|
||||
|
||||
See a [usage example](/docs/integrations/text_embedding/edenai).
|
||||
|
||||
```python
|
||||
from langchain_community.embeddings.edenai import EdenAiEmbeddings
|
||||
```
|
||||
|
||||
## Tools
|
||||
|
||||
Eden AI provides a list of tools that grants your Agent the ability to do multiple tasks, such as:
|
||||
* speech to text
|
||||
* text to speech
|
||||
* text explicit content detection
|
||||
* image explicit content detection
|
||||
* object detection
|
||||
* OCR invoice parsing
|
||||
* OCR ID parsing
|
||||
|
||||
See a [usage example](/docs/integrations/tools/edenai_tools).
|
||||
|
||||
```python
|
||||
from langchain_community.tools.edenai import (
|
||||
EdenAiExplicitImageTool,
|
||||
EdenAiObjectDetectionTool,
|
||||
EdenAiParsingIDTool,
|
||||
EdenAiParsingInvoiceTool,
|
||||
EdenAiSpeechToTextTool,
|
||||
EdenAiTextModerationTool,
|
||||
EdenAiTextToSpeechTool,
|
||||
)
|
||||
```
|
||||
@@ -1,27 +0,0 @@
|
||||
# ElevenLabs
|
||||
|
||||
>[ElevenLabs](https://elevenlabs.io/about) is a voice AI research & deployment company
|
||||
> with a mission to make content universally accessible in any language & voice.
|
||||
>
|
||||
>`ElevenLabs` creates the most realistic, versatile and contextually-aware
|
||||
> AI audio, providing the ability to generate speech in hundreds of
|
||||
> new and existing voices in 29 languages.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
First, you need to set up an ElevenLabs account. You can follow the
|
||||
[instructions here](https://docs.elevenlabs.io/welcome/introduction).
|
||||
|
||||
Install the Python package:
|
||||
|
||||
```bash
|
||||
pip install elevenlabs
|
||||
```
|
||||
|
||||
## Tools
|
||||
|
||||
See a [usage example](/docs/integrations/tools/eleven_labs_tts).
|
||||
|
||||
```python
|
||||
from langchain_community.tools import ElevenLabsText2SpeechTool
|
||||
```
|
||||
@@ -1,21 +0,0 @@
|
||||
# PygmalionAI
|
||||
|
||||
>[PygmalionAI](https://pygmalion.chat/) is a company supporting the
|
||||
> open-source models by serving the inference endpoint
|
||||
> for the [Aphrodite Engine](https://github.com/PygmalionAI/aphrodite-engine).
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
|
||||
```bash
|
||||
pip install aphrodite-engine
|
||||
```
|
||||
|
||||
## LLMs
|
||||
|
||||
See a [usage example](/docs/integrations/llms/aphrodite).
|
||||
|
||||
```python
|
||||
from langchain_community.llms import Aphrodite
|
||||
```
|
||||
@@ -12,7 +12,7 @@
|
||||
"https://api.together.xyz/settings/api-keys. This can be passed in as init param\n",
|
||||
"``together_api_key`` or set as environment variable ``TOGETHER_API_KEY``.\n",
|
||||
"\n",
|
||||
"Together API reference: https://docs.together.ai/reference\n",
|
||||
"Together API reference: https://docs.together.ai/reference/inference\n",
|
||||
"\n",
|
||||
"You will also need to install the `langchain-together` integration package:"
|
||||
]
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
# VoyageAI
|
||||
|
||||
All functionality related to VoyageAI
|
||||
|
||||
>[VoyageAI](https://www.voyageai.com/) Voyage AI builds embedding models, customized for your domain and company, for better retrieval quality.
|
||||
> customized for your domain and company, for better retrieval quality.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
Install the integration package with
|
||||
```bash
|
||||
pip install langchain-voyageai
|
||||
```
|
||||
|
||||
Get an VoyageAI api key and set it as an environment variable (`VOYAGE_API_KEY`)
|
||||
|
||||
|
||||
## Text Embedding Model
|
||||
|
||||
See a [usage example](/docs/integrations/text_embedding/voyageai)
|
||||
|
||||
```python
|
||||
from langchain_voyageai import VoyageAIEmbeddings
|
||||
```
|
||||
@@ -28,17 +28,17 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet flashrank\n",
|
||||
"%pip install --upgrade --quiet faiss\n",
|
||||
"% pip install --upgrade --quiet flashrank\n",
|
||||
"% pip install --upgrade --quiet faiss\n",
|
||||
"\n",
|
||||
"# OR (depending on Python version)\n",
|
||||
"\n",
|
||||
"%pip install --upgrade --quiet faiss_cpu"
|
||||
"% pip install --upgrade --quiet faiss_cpu"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@@ -53,10 +53,7 @@
|
||||
"def pretty_print_docs(docs):\n",
|
||||
" print(\n",
|
||||
" f\"\\n{'-' * 100}\\n\".join(\n",
|
||||
" [\n",
|
||||
" f\"Document {i+1}:\\n\\n{d.page_content}\\nMetadata: {d.metadata}\"\n",
|
||||
" for i, d in enumerate(docs)\n",
|
||||
" ]\n",
|
||||
" [f\"Document {i+1}:\\n\\n\" + d.page_content for i, d in enumerate(docs)]\n",
|
||||
" )\n",
|
||||
" )"
|
||||
]
|
||||
@@ -76,7 +73,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@@ -93,7 +90,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@@ -250,6 +247,14 @@
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 15:\n",
|
||||
"\n",
|
||||
"My plan to fight inflation will lower your costs and lower the deficit. \n",
|
||||
"\n",
|
||||
"17 Nobel laureates in economics say my plan will ease long-term inflationary pressures. Top business leaders and most Americans support my plan. And here’s the plan: \n",
|
||||
"\n",
|
||||
"First – cut the cost of prescription drugs. Just look at insulin. One in ten Americans has diabetes. In Virginia, I met a 13-year-old boy named Joshua Davis.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 16:\n",
|
||||
"\n",
|
||||
"And soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \n",
|
||||
"\n",
|
||||
"So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \n",
|
||||
@@ -258,16 +263,16 @@
|
||||
"\n",
|
||||
"There is so much we can do. Increase funding for prevention, treatment, harm reduction, and recovery.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 16:\n",
|
||||
"\n",
|
||||
"My plan to fight inflation will lower your costs and lower the deficit. \n",
|
||||
"\n",
|
||||
"17 Nobel laureates in economics say my plan will ease long-term inflationary pressures. Top business leaders and most Americans support my plan. And here’s the plan: \n",
|
||||
"\n",
|
||||
"First – cut the cost of prescription drugs. Just look at insulin. One in ten Americans has diabetes. In Virginia, I met a 13-year-old boy named Joshua Davis.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 17:\n",
|
||||
"\n",
|
||||
"So let’s not abandon our streets. Or choose between safety and equal justice. \n",
|
||||
"\n",
|
||||
"Let’s come together to protect our communities, restore trust, and hold law enforcement accountable. \n",
|
||||
"\n",
|
||||
"That’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 18:\n",
|
||||
"\n",
|
||||
"My plan will not only lower costs to give families a fair shot, it will lower the deficit. \n",
|
||||
"\n",
|
||||
"The previous Administration not only ballooned the deficit with tax cuts for the very wealthy and corporations, it undermined the watchdogs whose job was to keep pandemic relief funds from being wasted. \n",
|
||||
@@ -276,14 +281,6 @@
|
||||
"\n",
|
||||
"We’re going after the criminals who stole billions in relief money meant for small businesses and millions of Americans.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 18:\n",
|
||||
"\n",
|
||||
"So let’s not abandon our streets. Or choose between safety and equal justice. \n",
|
||||
"\n",
|
||||
"Let’s come together to protect our communities, restore trust, and hold law enforcement accountable. \n",
|
||||
"\n",
|
||||
"That’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 19:\n",
|
||||
"\n",
|
||||
"I understand. \n",
|
||||
@@ -319,8 +316,6 @@
|
||||
").load()\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
|
||||
"texts = text_splitter.split_documents(documents)\n",
|
||||
"for idx, text in enumerate(texts):\n",
|
||||
" text.metadata[\"id\"] = idx\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n",
|
||||
"retriever = FAISS.from_documents(texts, embedding).as_retriever(search_kwargs={\"k\": 20})\n",
|
||||
@@ -345,25 +340,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0, 5, 3]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers import ContextualCompressionRetriever\n",
|
||||
"from langchain.retrievers.document_compressors import FlashrankRerank\n",
|
||||
"from langchain.retrievers import ContextualCompressionRetriever, FlashrankRerank\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
@@ -393,7 +379,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 20,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@@ -413,16 +399,6 @@
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 2:\n",
|
||||
"\n",
|
||||
"He met the Ukrainian people. \n",
|
||||
"\n",
|
||||
"From President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n",
|
||||
"\n",
|
||||
"Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \n",
|
||||
"\n",
|
||||
"In this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 3:\n",
|
||||
"\n",
|
||||
"And tonight, I’m announcing that the Justice Department will name a chief prosecutor for pandemic fraud. \n",
|
||||
"\n",
|
||||
"By the end of this year, the deficit will be down to less than half what it was before I took office. \n",
|
||||
@@ -433,7 +409,19 @@
|
||||
"\n",
|
||||
"I’m a capitalist, but capitalism without competition isn’t capitalism. \n",
|
||||
"\n",
|
||||
"It’s exploitation—and it drives up prices.\n"
|
||||
"It’s exploitation—and it drives up prices.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 3:\n",
|
||||
"\n",
|
||||
"As Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” \n",
|
||||
"\n",
|
||||
"It’s time. \n",
|
||||
"\n",
|
||||
"But with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills. \n",
|
||||
"\n",
|
||||
"Inflation is robbing them of the gains they might otherwise feel. \n",
|
||||
"\n",
|
||||
"I get it. That’s why my top priority is getting prices under control.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -455,7 +443,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@@ -471,7 +459,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@@ -483,10 +471,10 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': 'What did the president say about Ketanji Brown Jackson',\n",
|
||||
" 'result': \"The President mentioned that Ketanji Brown Jackson is one of the nation's top legal minds and will continue Justice Breyer's legacy of excellence.\"}"
|
||||
" 'result': \"The President said that Ketanji Brown Jackson is one of our nation's top legal minds and will continue Justice Breyer's legacy of excellence.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -512,7 +500,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.2"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet google-cloud-discoveryengine"
|
||||
"%pip install --upgrade --quiet google-cloud-discoveryengine"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -115,12 +115,10 @@
|
||||
" - `global` (default)\n",
|
||||
" - `us`\n",
|
||||
" - `eu`\n",
|
||||
"\n",
|
||||
"One of:\n",
|
||||
"- `search_engine_id` - The ID of the search app you want to use. (Required for Blended Search)\n",
|
||||
"- `data_store_id` - The ID of the data store you want to use.\n",
|
||||
" - Note: This was called `search_engine_id` in previous versions of the retriever.\n",
|
||||
"\n",
|
||||
"The `project_id`, `search_engine_id` and `data_store_id` parameters can be provided explicitly in the retriever's constructor or through the environment variables - `PROJECT_ID`, `SEARCH_ENGINE_ID` and `DATA_STORE_ID`.\n",
|
||||
"The `project_id` and `data_store_id` parameters can be provided explicitly in the retriever's constructor or through the environment variables - `PROJECT_ID` and `DATA_STORE_ID`.\n",
|
||||
"\n",
|
||||
"You can also configure a number of optional parameters, including:\n",
|
||||
"\n",
|
||||
@@ -139,17 +137,17 @@
|
||||
"- `engine_data_type` - Defines the Vertex AI Search data type\n",
|
||||
" - `0` - Unstructured data\n",
|
||||
" - `1` - Structured data\n",
|
||||
" - `2` - Website data\n",
|
||||
" - `3` - [Blended search](https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es#multi-data-stores)\n",
|
||||
" - `2` - Website data with [Advanced Website Indexing](https://cloud.google.com/generative-ai-app-builder/docs/about-advanced-features#advanced-website-indexing)\n",
|
||||
"\n",
|
||||
"### Migration guide for `GoogleCloudEnterpriseSearchRetriever`\n",
|
||||
"\n",
|
||||
"In previous versions, this retriever was called `GoogleCloudEnterpriseSearchRetriever`.\n",
|
||||
"In previous versions, this retriever was called `GoogleCloudEnterpriseSearchRetriever`. Some backwards-incompatible changes had to be made to the retriever after the General Availability launch due to changes in the product behavior.\n",
|
||||
"\n",
|
||||
"To update to the new retriever, make the following changes:\n",
|
||||
"\n",
|
||||
"- Change the import from: `from langchain.retrievers import GoogleCloudEnterpriseSearchRetriever` -> `from langchain.retrievers import GoogleVertexAISearchRetriever`.\n",
|
||||
"- Change all class references from `GoogleCloudEnterpriseSearchRetriever` -> `GoogleVertexAISearchRetriever`.\n"
|
||||
"- Change all class references from `GoogleCloudEnterpriseSearchRetriever` -> `GoogleVertexAISearchRetriever`.\n",
|
||||
"- Upon class initialization, change the `search_engine_id` parameter name to `data_store_id`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -172,7 +170,6 @@
|
||||
"\n",
|
||||
"PROJECT_ID = \"<YOUR PROJECT ID>\" # Set to your Project ID\n",
|
||||
"LOCATION_ID = \"<YOUR LOCATION>\" # Set to your data store location\n",
|
||||
"SEARCH_ENGINE_ID = \"<YOUR SEARCH APP ID>\" # Set to your search app ID\n",
|
||||
"DATA_STORE_ID = \"<YOUR DATA STORE ID>\" # Set to your data store ID"
|
||||
]
|
||||
},
|
||||
@@ -284,32 +281,6 @@
|
||||
" print(doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure and use the retriever for **blended** data\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleVertexAISearchRetriever(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" location_id=LOCATION_ID,\n",
|
||||
" search_engine_id=SEARCH_ENGINE_ID,\n",
|
||||
" max_documents=3,\n",
|
||||
" engine_data_type=3,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"result = retriever.get_relevant_documents(query)\n",
|
||||
"for doc in result:\n",
|
||||
" print(doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -351,7 +322,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.0"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -25,21 +25,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-15T09:36:13.753824100Z",
|
||||
"start_time": "2024-03-15T09:36:13.225834400Z"
|
||||
}
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings import SparkLLMTextEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = SparkLLMTextEmbeddings(\n",
|
||||
" spark_app_id=\"<spark_app_id>\",\n",
|
||||
" spark_api_key=\"<spark_api_key>\",\n",
|
||||
" spark_api_secret=\"<spark_api_secret>\",\n",
|
||||
" spark_app_id=\"sk-*\", spark_api_key=\"\", spark_api_secret=\"\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -52,67 +45,44 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-15T09:36:25.436201400Z",
|
||||
"start_time": "2024-03-15T09:36:25.313456600Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[-0.043609619140625,\n 0.2017822265625,\n 0.0270843505859375,\n -0.250244140625,\n -0.024993896484375,\n -0.0382080078125,\n 0.06207275390625,\n -0.0146331787109375]"
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_q = \"Introducing iFlytek\"\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"text_1 = \"Science and Technology Innovation Company Limited, commonly known as iFlytek, is a leading Chinese technology company specializing in speech recognition, natural language processing, and artificial intelligence. With a rich history and remarkable achievements, iFlytek has emerged as a frontrunner in the field of intelligent speech and language technologies.iFlytek has made significant contributions to the advancement of human-computer interaction through its cutting-edge innovations. Their advanced speech recognition technology has not only improved the accuracy and efficiency of voice input systems but has also enabled seamless integration of voice commands into various applications and devices.The company's commitment to research and development has been instrumental in its success. iFlytek invests heavily in fostering talent and collaboration with academic institutions, resulting in groundbreaking advancements in speech synthesis and machine translation. Their dedication to innovation has not only transformed the way we communicate but has also enhanced accessibility for individuals with disabilities.\"\n",
|
||||
"\n",
|
||||
"text_2 = \"Moreover, iFlytek's impact extends beyond domestic boundaries, as they actively promote international cooperation and collaboration in the field of artificial intelligence. They have consistently participated in global competitions and contributed to the development of international standards.In recognition of their achievements, iFlytek has received numerous accolades and awards both domestically and internationally. Their contributions have revolutionized the way we interact with technology and have paved the way for a future where voice-based interfaces play a vital role.Overall, iFlytek is a trailblazer in the field of intelligent speech and language technologies, and their commitment to innovation and excellence deserves commendation.\"\n",
|
||||
"\n",
|
||||
"query_result = embeddings.embed_query(text_q)\n",
|
||||
"query_result[:8]"
|
||||
"os.environ[\"SPARK_APP_ID\"] = \"YOUR_APP_ID\"\n",
|
||||
"os.environ[\"SPARK_API_KEY\"] = \"YOUR_API_KEY\"\n",
|
||||
"os.environ[\"SPARK_API_SECRET\"] = \"YOUR_API_SECRET\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-15T09:36:54.657224Z",
|
||||
"start_time": "2024-03-15T09:36:54.404690400Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[-0.161865234375,\n 0.58984375,\n 0.998046875,\n 0.365966796875,\n 0.72900390625,\n 0.6015625,\n -0.8408203125,\n -0.2666015625]"
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_1 = \"iFLYTEK is a well-known intelligent speech and artificial intelligence publicly listed company in the Asia-Pacific Region. Since its establishment, the company is devoted to cornerstone technological research in speech and languages, natural language understanding, machine learning, machine reasoning, adaptive learning, and has maintained the world-leading position in those domains. The company actively promotes the development of A.I. products and their sector-based applications, with visions of enabling machines to listen and speak, understand and think, creating a better world with artificial intelligence.\"\n",
|
||||
"text_2 = \"iFLYTEK Open Platform was launched in 2010 by iFLYTEK as China’s first Artificial Intelligence open platform for Mobile Internet and intelligent hardware developers.\"\n",
|
||||
"\n",
|
||||
"query_result = embeddings.embed_query(text_2)\n",
|
||||
"query_result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc_result = embeddings.embed_documents([text_1, text_2])\n",
|
||||
"doc_result[0][:8]"
|
||||
"doc_result"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"language": "python",
|
||||
"display_name": "Python 3 (ipykernel)"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"\n",
|
||||
">[Voyage AI](https://www.voyageai.com/) provides cutting-edge embedding/vectorizations models.\n",
|
||||
"\n",
|
||||
"Let's load the Voyage Embedding class. (Install the LangChain partner package with `pip install langchain-voyageai`)"
|
||||
"Let's load the Voyage Embedding class."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -19,7 +19,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_voyageai import VoyageAIEmbeddings"
|
||||
"from langchain_community.embeddings import VoyageEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -37,7 +37,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = VoyageAIEmbeddings(\n",
|
||||
"embeddings = VoyageEmbeddings(\n",
|
||||
" voyage_api_key=\"[ Your Voyage API key ]\", model=\"voyage-2\"\n",
|
||||
")"
|
||||
]
|
||||
|
||||
@@ -124,7 +124,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, create_openai_tools_agent\n",
|
||||
"from langchain.agents import AgentExecutor, create_react_agent\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
@@ -135,8 +135,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
|
||||
"prompt = hub.pull(\"hwchase17/openai-tools-agent\")\n",
|
||||
"agent = create_openai_tools_agent(\n",
|
||||
"prompt = hub.pull(\"hwchase17/react\")\n",
|
||||
"agent = create_react_agent(\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" llm=llm,\n",
|
||||
" prompt=prompt,\n",
|
||||
@@ -151,9 +151,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"Send a greeting to my coworkers in the #general channel. Note use `channel` as key of channel id, and `message` as key of content to sent in the channel.\"\n",
|
||||
" }\n",
|
||||
" {\"input\": \"Send a greeting to my coworkers in the #general channel.\"}\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -9,13 +9,17 @@
|
||||
"source": [
|
||||
"# Azure Cosmos DB\n",
|
||||
"\n",
|
||||
"This notebook shows you how to leverage this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) to store documents in collections, create indicies and perform vector search queries using approximate nearest neighbor algorithms such as COS (cosine distance), L2 (Euclidean distance), and IP (inner product) to locate documents close to the query vectors. \n",
|
||||
">[Azure Cosmos DB for MongoDB vCore](https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/) makes it easy to create a database with full native MongoDB support.\n",
|
||||
"> You can apply your MongoDB experience and continue to use your favorite MongoDB drivers, SDKs, and tools by pointing your application to the API for MongoDB vCore account's connection string.\n",
|
||||
"> Use vector search in Azure Cosmos DB for MongoDB vCore to seamlessly integrate your AI-based applications with your data that's stored in Azure Cosmos DB.\n",
|
||||
"\n",
|
||||
"This notebook shows you how to leverage the [Vector Search](https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search) capabilities within Azure Cosmos DB for Mongo vCore to store documents in collections, create indicies and perform vector search queries using approximate nearest neighbor algorithms such as COS (cosine distance), L2 (Euclidean distance), and IP (inner product) to locate documents close to the query vectors. \n",
|
||||
" \n",
|
||||
"Azure Cosmos DB is the database that powers OpenAI's ChatGPT service. It offers single-digit millisecond response times, automatic and instant scalability, along with guaranteed speed at any scale. \n",
|
||||
"Azure Cosmos DB for MongoDB vCore provides developers with a fully managed MongoDB-compatible database service for building modern applications with a familiar architecture.\n",
|
||||
"\n",
|
||||
"Azure Cosmos DB for MongoDB vCore(https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/) provides developers with a fully managed MongoDB-compatible database service for building modern applications with a familiar architecture. You can apply your MongoDB experience and continue to use your favorite MongoDB drivers, SDKs, and tools by pointing your application to the API for MongoDB vCore account's connection string.\n",
|
||||
"With Cosmos DB for MongoDB vCore, developers can enjoy the benefits of native Azure integrations, low total cost of ownership (TCO), and the familiar vCore architecture when migrating existing applications or building new ones.\n",
|
||||
"\n",
|
||||
"[Sign Up](https://azure.microsoft.com/en-us/free/) for lifetime free access to get started today.\n",
|
||||
"[Sign Up](https://azure.microsoft.com/en-us/free/) for free to get started today.\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
|
||||
@@ -1,787 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f63dfcf9-fd9d-4ac1-a0b3-c02d4dce7faf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Couchbase \n",
|
||||
"[Couchbase](http://couchbase.com/) is an award-winning distributed NoSQL cloud database that delivers unmatched versatility, performance, scalability, and financial value for all of your cloud, mobile, AI, and edge computing applications. Couchbase embraces AI with coding assistance for developers and vector search for their applications.\n",
|
||||
"\n",
|
||||
"Vector Search is a part of the [Full Text Search Service](https://docs.couchbase.com/server/current/learn/services-and-indexes/services/search-service.html) (Search Service) in Couchbase.\n",
|
||||
"\n",
|
||||
"This tutorial explains how to use Vector Search in Couchbase. You can work with both [Couchbase Capella](https://www.couchbase.com/products/capella/) and your self-managed Couchbase Server."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "43326be4-4433-4de2-ad42-6eb91a722bad",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bec8d532-fec7-4dc7-9be3-020aa7bdb01f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-openai couchbase"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4a972cbc-bf59-46eb-9b50-e5dc3a69dcf0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "acf1b168-622f-465c-a9a5-d27a6d7e7a8f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import the Vector Store and Embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "23ce45ab-bfd2-42e1-b681-514a550f0232",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores import CouchbaseVectorStore\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3144ba02-1eaa-4449-853e-f034ca5706bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Couchbase Connection Object\n",
|
||||
"We create a connection to the Couchbase cluster initially and then pass the cluster object to the Vector Store. \n",
|
||||
"\n",
|
||||
"Here, we are connecting using the username and password. You can also connect using any other supported way to your cluster. \n",
|
||||
"\n",
|
||||
"For more information on connecting to the Couchbase cluster, please check the [Python SDK documentation](https://docs.couchbase.com/python-sdk/current/hello-world/start-using-sdk.html#connect)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "52fe583a-12db-4dc2-9281-1174bf1d4e5c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"COUCHBASE_CONNECTION_STRING = (\n",
|
||||
" \"couchbase://localhost\" # or \"couchbases://localhost\" if using TLS\n",
|
||||
")\n",
|
||||
"DB_USERNAME = \"Administrator\"\n",
|
||||
"DB_PASSWORD = \"Password\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9986c6b9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import timedelta\n",
|
||||
"\n",
|
||||
"from couchbase.auth import PasswordAuthenticator\n",
|
||||
"from couchbase.cluster import Cluster\n",
|
||||
"from couchbase.options import ClusterOptions\n",
|
||||
"\n",
|
||||
"auth = PasswordAuthenticator(DB_USERNAME, DB_PASSWORD)\n",
|
||||
"options = ClusterOptions(auth)\n",
|
||||
"cluster = Cluster(COUCHBASE_CONNECTION_STRING, options)\n",
|
||||
"\n",
|
||||
"# Wait until the cluster is ready for use.\n",
|
||||
"cluster.wait_until_ready(timedelta(seconds=5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "90c5dec9-f6cb-41eb-9f30-13cab7b107db",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will now set the bucket, scope, and collection names in the Couchbase cluster that we want to use for Vector Search. \n",
|
||||
"\n",
|
||||
"For this example, we are using the default scope & collections."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "1b1d0a26-e9d4-4823-9800-9549d24d3d16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"BUCKET_NAME = \"testing\"\n",
|
||||
"SCOPE_NAME = \"_default\"\n",
|
||||
"COLLECTION_NAME = \"_default\"\n",
|
||||
"SEARCH_INDEX_NAME = \"vector-index\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "efbac6ff-c2ac-4443-9250-7cc88061346b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For this tutorial, we will use OpenAI embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "87625579-86d7-4de4-8a4d-cee674a6b676",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3677b4b0-3711-419c-89ff-32ef4d3e3022",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the Search Index\n",
|
||||
"Currently, the Search index needs to be created from the Couchbase Capella or Server UI or using the REST interface. \n",
|
||||
"\n",
|
||||
"Let us define a Search index with the name `vector-index` on the testing bucket\n",
|
||||
"\n",
|
||||
"For this example, let us use the Import Index feature on the Search Service on the UI. \n",
|
||||
"\n",
|
||||
"We are defining an index on the `testing` bucket's `_default` scope on the `_default` collection with the vector field set to `embedding` with 1536 dimensions and the text field set to `text`. We are also indexing and storing all the fields under `metadata` in the document as a dynamic mapping to account for varying document structures. The similarity metric is set to `dot_product`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "655117ae-9b1f-4139-b437-ca7685975a54",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### How to Import an Index to the Full Text Search service?\n",
|
||||
" - [Couchbase Server](https://docs.couchbase.com/server/current/search/import-search-index.html)\n",
|
||||
" - Click on Search -> Add Index -> Import\n",
|
||||
" - Copy the following Index definition in the Import screen\n",
|
||||
" - Click on Create Index to create the index.\n",
|
||||
" - [Couchbase Capella](https://docs.couchbase.com/cloud/search/import-search-index.html)\n",
|
||||
" - Copy the index definition to a new file `index.json`\n",
|
||||
" - Import the file in Capella using the instructions in the documentation.\n",
|
||||
" - Click on Create Index to create the index.\n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f85bc468-d9b8-487d-999a-3b5d2fb78e41",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Index Definition\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"name\": \"vector-index\",\n",
|
||||
" \"type\": \"fulltext-index\",\n",
|
||||
" \"params\": {\n",
|
||||
" \"doc_config\": {\n",
|
||||
" \"docid_prefix_delim\": \"\",\n",
|
||||
" \"docid_regexp\": \"\",\n",
|
||||
" \"mode\": \"type_field\",\n",
|
||||
" \"type_field\": \"type\"\n",
|
||||
" },\n",
|
||||
" \"mapping\": {\n",
|
||||
" \"default_analyzer\": \"standard\",\n",
|
||||
" \"default_datetime_parser\": \"dateTimeOptional\",\n",
|
||||
" \"default_field\": \"_all\",\n",
|
||||
" \"default_mapping\": {\n",
|
||||
" \"dynamic\": true,\n",
|
||||
" \"enabled\": true,\n",
|
||||
" \"properties\": {\n",
|
||||
" \"metadata\": {\n",
|
||||
" \"dynamic\": true,\n",
|
||||
" \"enabled\": true\n",
|
||||
" },\n",
|
||||
" \"embedding\": {\n",
|
||||
" \"enabled\": true,\n",
|
||||
" \"dynamic\": false,\n",
|
||||
" \"fields\": [\n",
|
||||
" {\n",
|
||||
" \"dims\": 1536,\n",
|
||||
" \"index\": true,\n",
|
||||
" \"name\": \"embedding\",\n",
|
||||
" \"similarity\": \"dot_product\",\n",
|
||||
" \"type\": \"vector\",\n",
|
||||
" \"vector_index_optimized_for\": \"recall\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" \"text\": {\n",
|
||||
" \"enabled\": true,\n",
|
||||
" \"dynamic\": false,\n",
|
||||
" \"fields\": [\n",
|
||||
" {\n",
|
||||
" \"index\": true,\n",
|
||||
" \"name\": \"text\",\n",
|
||||
" \"store\": true,\n",
|
||||
" \"type\": \"text\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"default_type\": \"_default\",\n",
|
||||
" \"docvalues_dynamic\": false,\n",
|
||||
" \"index_dynamic\": true,\n",
|
||||
" \"store_dynamic\": true,\n",
|
||||
" \"type_field\": \"_type\"\n",
|
||||
" },\n",
|
||||
" \"store\": {\n",
|
||||
" \"indexType\": \"scorch\",\n",
|
||||
" \"segmentVersion\": 16\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"sourceType\": \"gocbcore\",\n",
|
||||
" \"sourceName\": \"testing\",\n",
|
||||
" \"sourceParams\": {},\n",
|
||||
" \"planParams\": {\n",
|
||||
" \"maxPartitionsPerPIndex\": 103,\n",
|
||||
" \"indexPartitions\": 10,\n",
|
||||
" \"numReplicas\": 0\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "556dc68c-9089-4390-8dc9-b77051e7fc34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For more details on how to create a Search index with support for Vector fields, please refer to the documentation.\n",
|
||||
"\n",
|
||||
"- [Couchbase Capella](https://docs.couchbase.com/cloud/vector-search/create-vector-search-index-ui.html)\n",
|
||||
" \n",
|
||||
"- [Couchbase Server](https://docs.couchbase.com/server/current/vector-search/create-vector-search-index-ui.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "75f4037d-e509-4de7-a8d1-63a05de24e9d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Vector Store\n",
|
||||
"We create the vector store object with the cluster information and the search index name."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "33db4670-76c5-49ba-94d6-a8fa35583058",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store = CouchbaseVectorStore(\n",
|
||||
" cluster=cluster,\n",
|
||||
" bucket_name=BUCKET_NAME,\n",
|
||||
" scope_name=SCOPE_NAME,\n",
|
||||
" collection_name=COLLECTION_NAME,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" index_name=SEARCH_INDEX_NAME,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0aa98793-5ac2-4f76-bbba-2d40856c2d58",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Specify the Text & Embeddings Field\n",
|
||||
"You can optionally specify the text & embeddings field for the document using the `text_key` and `embedding_key` fields.\n",
|
||||
"```\n",
|
||||
"vector_store = CouchbaseVectorStore(\n",
|
||||
" cluster=cluster,\n",
|
||||
" bucket_name=BUCKET_NAME,\n",
|
||||
" scope_name=SCOPE_NAME,\n",
|
||||
" collection_name=COLLECTION_NAME,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" index_name=SEARCH_INDEX_NAME,\n",
|
||||
" text_key=\"text\",\n",
|
||||
" embedding_key=\"embedding\",\n",
|
||||
")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "790dc1ac-0ab8-4cb5-989d-31ca7c241068",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Vector Search Example\n",
|
||||
"For this example, we are going to load the \"state_of_the_union.txt\" file via the TextLoader, chunk the text into 500 character chunks with no overlaps and index all these chunks into Couchbase.\n",
|
||||
"\n",
|
||||
"After the data is indexed, we perform a simple query to find the top 4 chunks that are similar to the query \"What did president say about Ketanji Brown Jackson\".\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "440350df-cbc6-48f7-8009-2e783be18306",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9d3b4c7c-abd6-4dfa-ad63-470f16661319",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store = CouchbaseVectorStore.from_documents(\n",
|
||||
" documents=docs,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" cluster=cluster,\n",
|
||||
" bucket_name=BUCKET_NAME,\n",
|
||||
" scope_name=SCOPE_NAME,\n",
|
||||
" collection_name=COLLECTION_NAME,\n",
|
||||
" index_name=SEARCH_INDEX_NAME,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "91fdce6c-8f7c-4060-865a-2fd742846664",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.' metadata={'source': '../../modules/state_of_the_union.txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(query)\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9b46c93-65f6-4e4f-87a2-5cebea3b7a6b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity Search with Score\n",
|
||||
"You can fetch the scores for the results by calling the `similarity_search_with_score` method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "24b146b2-55a2-4fe8-8659-3649032f5dc7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.' metadata={'source': '../../modules/state_of_the_union.txt'}\n",
|
||||
"Score: 0.8211871385574341\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search_with_score(query)\n",
|
||||
"document, score = results[0]\n",
|
||||
"print(document)\n",
|
||||
"print(f\"Score: {score}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9983e83d-efd0-4b75-80db-150e0694e822",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Specifying Fields to Return\n",
|
||||
"You can specify the fields to return from the document using `fields` parameter in the searches. These fields are returned as part of the `metadata` object in the returned Document. You can fetch any field that is stored in the Search index. The `text_key` of the document is returned as part of the document's `page_content`.\n",
|
||||
"\n",
|
||||
"If you do not specify any fields to be fetched, all the fields stored in the index are returned.\n",
|
||||
"\n",
|
||||
"If you want to fetch one of the fields in the metadata, you need to specify it using `.`\n",
|
||||
"\n",
|
||||
"For example, to fetch the `source` field in the metadata, you need to specify `metadata.source`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "ffa743dc-4e89-405b-ad71-7390338889e6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.' metadata={'source': '../../modules/state_of_the_union.txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(query, fields=[\"metadata.source\"])\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a5e45eb2-aa97-45df-bcc5-410e9626e506",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Hybrid Search\n",
|
||||
"Couchbase allows you to do hybrid searches by combining Vector Search results with searches on non-vector fields of the document like the `metadata` object. \n",
|
||||
"\n",
|
||||
"The results will be based on the combination of the results from both Vector Search and the searches supported by Search Service. The scores of each of the component searches are added up to get the total score of the result.\n",
|
||||
"\n",
|
||||
"To perform hybrid searches, there is an optional parameter, `search_options` that can be passed to all the similarity searches. \n",
|
||||
"The different search/query possibilities for the `search_options` can be found [here](https://docs.couchbase.com/server/current/search/search-request-params.html#query-object)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a5db3685-1918-4c63-8148-0bb3a71ea677",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Diverse Metadata for Hybrid Search\n",
|
||||
"In order to simulate hybrid search, let us create some random metadata from the existing documents. \n",
|
||||
"We uniformly add three fields to the metadata, `date` between 2010 & 2020, `rating` between 1 & 5 and `author` set to either John Doe or Jane Doe. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "7d2e607d-6bbc-4cef-83e3-b6a28bb269ea",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'author': 'John Doe', 'date': '2016-01-01', 'rating': 2, 'source': '../../modules/state_of_the_union.txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Adding metadata to documents\n",
|
||||
"for i, doc in enumerate(docs):\n",
|
||||
" doc.metadata[\"date\"] = f\"{range(2010, 2020)[i % 10]}-01-01\"\n",
|
||||
" doc.metadata[\"rating\"] = range(1, 6)[i % 5]\n",
|
||||
" doc.metadata[\"author\"] = [\"John Doe\", \"Jane Doe\"][i % 2]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(docs)\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(query)\n",
|
||||
"print(results[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6cad893b-3977-4556-ab1d-d12bce68b306",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Search by Exact Value\n",
|
||||
"We can search for exact matches on a textual field like the author in the `metadata` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "dc06ba4a-8a6b-4c55-bb69-95cd92db273f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='This is personal to me and Jill, to Kamala, and to so many of you. \\n\\nCancer is the #2 cause of death in America–second only to heart disease. \\n\\nLast month, I announced our plan to supercharge \\nthe Cancer Moonshot that President Obama asked me to lead six years ago. \\n\\nOur goal is to cut the cancer death rate by at least 50% over the next 25 years, turn more cancers from death sentences into treatable diseases. \\n\\nMore support for patients and families.' metadata={'author': 'John Doe'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" query,\n",
|
||||
" search_options={\"query\": {\"field\": \"metadata.author\", \"match\": \"John Doe\"}},\n",
|
||||
" fields=[\"metadata.author\"],\n",
|
||||
")\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9106b594-b41e-4329-b98c-9b9f8a34d6f7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Search by Partial Match\n",
|
||||
"We can search for partial matches by specifying a fuzziness for the search. This is useful when you want to search for slight variations or misspellings of a search query.\n",
|
||||
"\n",
|
||||
"Here, \"Jae\" is close (fuzziness of 1) to \"Jane\"."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "fd4749e6-ef4f-4cb5-95ff-37c4fa8283d8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system.' metadata={'author': 'Jane Doe'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" query,\n",
|
||||
" search_options={\n",
|
||||
" \"query\": {\"field\": \"metadata.author\", \"match\": \"Jae\", \"fuzziness\": 1}\n",
|
||||
" },\n",
|
||||
" fields=[\"metadata.author\"],\n",
|
||||
")\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1bbf9449-6e30-4bd1-9eeb-f3b60952fcab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Search by Date Range Query\n",
|
||||
"We can search for documents that are within a date range query on a date field like `metadata.date`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "b7b47e7d-c32f-4999-bce9-3c3c3cebffd0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='He will never extinguish their love of freedom. He will never weaken the resolve of the free world. \\n\\nWe meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \\n\\nThe pandemic has been punishing. \\n\\nAnd so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \\n\\nI understand.' metadata={'author': 'Jane Doe', 'date': '2017-01-01', 'rating': 3, 'source': '../../modules/state_of_the_union.txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"Any mention about independence?\"\n",
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" query,\n",
|
||||
" search_options={\n",
|
||||
" \"query\": {\n",
|
||||
" \"start\": \"2016-12-31\",\n",
|
||||
" \"end\": \"2017-01-02\",\n",
|
||||
" \"inclusive_start\": True,\n",
|
||||
" \"inclusive_end\": False,\n",
|
||||
" \"field\": \"metadata.date\",\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a18d4ea2-bfab-4f15-9839-674faf1c6f0d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Search by Numeric Range Query\n",
|
||||
"We can search for documents that are within a range for a numeric field like `metadata.rating`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "7e8bf7c5-07d1-4c3f-86d7-1fa3a454dc7f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(Document(page_content='He will never extinguish their love of freedom. He will never weaken the resolve of the free world. \\n\\nWe meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \\n\\nThe pandemic has been punishing. \\n\\nAnd so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \\n\\nI understand.', metadata={'author': 'Jane Doe', 'date': '2017-01-01', 'rating': 3, 'source': '../../modules/state_of_the_union.txt'}), 0.9000703597577832)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"Any mention about independence?\"\n",
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" query,\n",
|
||||
" search_options={\n",
|
||||
" \"query\": {\n",
|
||||
" \"min\": 3,\n",
|
||||
" \"max\": 5,\n",
|
||||
" \"inclusive_min\": True,\n",
|
||||
" \"inclusive_max\": True,\n",
|
||||
" \"field\": \"metadata.rating\",\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f16bf86-f01c-4a77-8406-275f7313f493",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Combining Multiple Search Queries\n",
|
||||
"Different search queries can be combined using AND (conjuncts) or OR (disjuncts) operators.\n",
|
||||
"\n",
|
||||
"In this example, we are checking for documents with a rating between 3 & 4 and dated between 2015 & 2018."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "dd0fe7f1-aa40-4c6f-889b-99ad5efcd88b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(Document(page_content='He will never extinguish their love of freedom. He will never weaken the resolve of the free world. \\n\\nWe meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \\n\\nThe pandemic has been punishing. \\n\\nAnd so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \\n\\nI understand.', metadata={'author': 'Jane Doe', 'date': '2017-01-01', 'rating': 3, 'source': '../../modules/state_of_the_union.txt'}), 1.3598770370389914)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"Any mention about independence?\"\n",
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" query,\n",
|
||||
" search_options={\n",
|
||||
" \"query\": {\n",
|
||||
" \"conjuncts\": [\n",
|
||||
" {\"min\": 3, \"max\": 4, \"inclusive_max\": True, \"field\": \"metadata.rating\"},\n",
|
||||
" {\"start\": \"2016-12-31\", \"end\": \"2017-01-02\", \"field\": \"metadata.date\"},\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39258571-3233-45c3-a6ad-5c3c90ea2b1c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Other Queries\n",
|
||||
"Similarly, you can use any of the supported Query methods like Geo Distance, Polygon Search, Wildcard, Regular Expressions, etc in the `search_options` parameter. Please refer to the documentation for more details on the available query methods and their syntax.\n",
|
||||
"\n",
|
||||
"- [Couchbase Capella](https://docs.couchbase.com/cloud/search/search-request-params.html#query-object)\n",
|
||||
"- [Couchbase Server](https://docs.couchbase.com/server/current/search/search-request-params.html#query-object)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "80958c2b-6a67-45e6-b7f0-fd2461d75e0f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Frequently Asked Questions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4f7f9838-cc20-44bc-a72d-06f2cb6c3fca",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Question: Should I create the Search index before creating the CouchbaseVectorStore object?\n",
|
||||
"Yes, currently you need to create the Search index before creating the `CouchbaseVectoreStore` object.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3f0dbc1b-9e82-4ec3-9330-6b54de00661e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Question: I am not seeing all the fields that I specified in my search results. \n",
|
||||
"\n",
|
||||
"In Couchbase, we can only return the fields stored in the Search index. Please ensure that the field that you are trying to access in the search results is part of the Search index.\n",
|
||||
"\n",
|
||||
"One way to handle this is to index and store a document's fields dynamically in the index. \n",
|
||||
"\n",
|
||||
"- In Capella, you need to go to \"Advanced Mode\" then under the chevron \"General Settings\" you can check \"[X] Store Dynamic Fields\" or \"[X] Index Dynamic Fields\"\n",
|
||||
"- In Couchbase Server, in the Index Editor (not Quick Editor) under the chevron \"Advanced\" you can check \"[X] Store Dynamic Fields\" or \"[X] Index Dynamic Fields\"\n",
|
||||
"\n",
|
||||
"Note that these options will increase the size of the index.\n",
|
||||
"\n",
|
||||
"For more details on dynamic mappings, please refer to the [documentation](https://docs.couchbase.com/cloud/search/customize-index.html).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3702977a-2e25-48b6-b662-edd5cb94cdec",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Question: I am unable to see the metadata object in my search results. \n",
|
||||
"This is most likely due to the `metadata` field in the document not being indexed and/or stored by the Couchbase Search index. In order to index the `metadata` field in the document, you need to add it to the index as a child mapping. \n",
|
||||
"\n",
|
||||
"If you select to map all the fields in the mapping, you will be able to search by all metadata fields. Alternatively, to optimize the index, you can select the specific fields inside `metadata` object to be indexed. You can refer to the [docs](https://docs.couchbase.com/cloud/search/customize-index.html) to learn more about indexing child mappings.\n",
|
||||
"\n",
|
||||
"Creating Child Mappings\n",
|
||||
"\n",
|
||||
"* [Couchbase Capella](https://docs.couchbase.com/cloud/search/create-child-mapping.html)\n",
|
||||
"* [Couchbase Server](https://docs.couchbase.com/server/current/search/create-child-mapping.html)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -9,7 +9,7 @@
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `Google Cloud Vertex AI Vector Search` vector database.\n",
|
||||
"\n",
|
||||
"> [Google Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/vector-search/overview), formerly known as Vertex AI Matching Engine, provides the industry's leading high-scale low latency vector database. These vector databases are commonly referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.\n",
|
||||
"> [Google Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/matching-engine/overview), formerly known as Vertex AI Matching Engine, provides the industry's leading high-scale low latency vector database. These vector databases are commonly referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.\n",
|
||||
"\n",
|
||||
"**Note**: This module expects an endpoint and deployed index already created as the creation time takes close to one hour. To see how to create an index refer to the section [Create Index and deploy it to an Endpoint](#create-index-and-deploy-it-to-an-endpoint)"
|
||||
]
|
||||
@@ -29,7 +29,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_vertexai import VectorSearchVectorStore"
|
||||
"from langchain_community.vectorstores import MatchingEngine"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -50,7 +50,7 @@
|
||||
"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"vector_store = VectorSearchVectorStore.from_components(\n",
|
||||
"vector_store = MatchingEngine.from_components(\n",
|
||||
" texts=texts,\n",
|
||||
" project_id=\"<my_project_id>\",\n",
|
||||
" region=\"<my_region>\",\n",
|
||||
|
||||
@@ -37,21 +37,9 @@
|
||||
"\n",
|
||||
"To run this demo we need a running Infinispan instance without authentication and a data file.\n",
|
||||
"In the next three cells we're going to:\n",
|
||||
"- download the data file\n",
|
||||
"- create the configuration\n",
|
||||
"- run Infinispan in docker"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9678d5ce-894c-4e28-bf68-20d45507122f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%bash\n",
|
||||
"#get an archive of news\n",
|
||||
"wget https://raw.githubusercontent.com/rigazilla/infinispan-vector/main/bbc_news.csv.gz"
|
||||
"- run Infinispan in docker\n",
|
||||
"- download the data file"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -88,6 +76,18 @@
|
||||
"' > infinispan-noauth.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9678d5ce-894c-4e28-bf68-20d45507122f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%bash\n",
|
||||
"#get an archive of news\n",
|
||||
"wget https://raw.githubusercontent.com/rigazilla/infinispan-vector/main/bbc_news.csv.gz"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -95,8 +95,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!docker rm --force infinispanvs-demo\n",
|
||||
"!docker run -d --name infinispanvs-demo -v $(pwd):/user-config -p 11222:11222 infinispan/server:15.0 -c /user-config/infinispan-noauth.yaml"
|
||||
"!docker run -d --name infinispanvs-demo -v $(pwd):/user-config -p 11222:11222 infinispan/server:15.0.0.Dev09 -c /user-config/infinispan-noauth.yaml "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -134,8 +133,80 @@
|
||||
"## Setup Infinispan cache\n",
|
||||
"\n",
|
||||
"Infinispan is a very flexible key-value store, it can store raw bits as well as complex data type.\n",
|
||||
"User has complete freedom in the datagrid configuration, but for simple data type everything is automatically\n",
|
||||
"configured by the python layer. We take advantage of this feature so we can focus on our application."
|
||||
"We need to configure it to store data containing embedded vectors.\n",
|
||||
"\n",
|
||||
"In the next cells we're going to:\n",
|
||||
"- create an empty Infinispan VectoreStore\n",
|
||||
"- deploy a protobuf definition of our data\n",
|
||||
"- create a cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "49668bf1-778b-466d-86fb-41747ed52b74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Creating a langchain_core.VectorStore\n",
|
||||
"from langchain_community.vectorstores import InfinispanVS\n",
|
||||
"\n",
|
||||
"ispnvs = InfinispanVS.from_texts(\n",
|
||||
" texts={}, embedding=hf, cache_name=\"demo_cache\", entity_name=\"demo_entity\"\n",
|
||||
")\n",
|
||||
"ispn = ispnvs.ispn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0cedf066-aaab-4185-b049-93eea9b48329",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Protobuf definition\n",
|
||||
"\n",
|
||||
"Below there's the protobuf definition of our data type that contains:\n",
|
||||
"- embedded vector (field 1)\n",
|
||||
"- text of the news (2)\n",
|
||||
"- title of the news (3)\n",
|
||||
"\n",
|
||||
"As you can see, there are additional annotations in the comments that tell Infinispan that:\n",
|
||||
"- data type must be indexed (`@Indexed`)\n",
|
||||
"- field 1 is an embeddeded vector (`@Vector`)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1fa0add0-8317-4667-9b8c-5d91c47f752a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"# Infinispan supports protobuf schemas\n",
|
||||
"schema_vector = \"\"\"\n",
|
||||
"/**\n",
|
||||
" * @Indexed\n",
|
||||
" */\n",
|
||||
"message demo_entity {\n",
|
||||
"/**\n",
|
||||
" * @Vector(dimension=384)\n",
|
||||
" */\n",
|
||||
"repeated float vector = 1;\n",
|
||||
"optional string text = 2;\n",
|
||||
"optional string title = 3;\n",
|
||||
"}\n",
|
||||
"\"\"\"\n",
|
||||
"# Cleanup before deploy a new schema\n",
|
||||
"ispnvs.schema_delete()\n",
|
||||
"output = ispnvs.schema_create(schema_vector)\n",
|
||||
"assert output.status_code == 200\n",
|
||||
"assert json.loads(output.text)[\"error\"] is None\n",
|
||||
"# Create the cache\n",
|
||||
"ispnvs.cache_create()\n",
|
||||
"# Cleanup old data and index\n",
|
||||
"ispnvs.cache_clear()\n",
|
||||
"ispnvs.cache_index_reindex()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -145,7 +216,8 @@
|
||||
"source": [
|
||||
"## Prepare the data\n",
|
||||
"\n",
|
||||
"In this demo we rely on the default configuration, thus texts, metadatas and vectors in the same cache, but other options are possible: i.e. content can be store somewhere else and vector store could contain only a reference to the actual content."
|
||||
"In this demo we choose to store text,vector and metadata in the same cache, but other options\n",
|
||||
"are possible: i.e. content can be store somewhere else and vector store could contain only a reference to the actual content."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -167,12 +239,15 @@
|
||||
" metas = []\n",
|
||||
" embeds = []\n",
|
||||
" for row in spamreader:\n",
|
||||
" # first and fifth values are joined to form the content\n",
|
||||
" # first and fifth value are joined to form the content\n",
|
||||
" # to be processed\n",
|
||||
" text = row[0] + \".\" + row[4]\n",
|
||||
" texts.append(text)\n",
|
||||
" # Storing meta\n",
|
||||
" # Store text and title as metadata\n",
|
||||
" meta = {\"text\": row[4], \"title\": row[0]}\n",
|
||||
" meta = {}\n",
|
||||
" meta[\"text\"] = row[4]\n",
|
||||
" meta[\"title\"] = row[0]\n",
|
||||
" metas.append(meta)\n",
|
||||
" i = i + 1\n",
|
||||
" # Change this to change the number of news you want to load\n",
|
||||
@@ -196,10 +271,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# add texts and fill vector db\n",
|
||||
"\n",
|
||||
"from langchain_community.vectorstores import InfinispanVS\n",
|
||||
"\n",
|
||||
"ispnvs = InfinispanVS.from_texts(texts, hf, metas)"
|
||||
"keys = ispnvs.add_texts(texts, metas)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -289,6 +361,18 @@
|
||||
"print_docs(ispnvs.similarity_search(\"How to stay young\", 5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "862e4af2-9f8a-4985-90cb-997477901b1e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Clean up\n",
|
||||
"ispnvs.schema_delete()\n",
|
||||
"ispnvs.cache_delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -316,7 +400,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.18"
|
||||
"version": "3.8.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
"\n",
|
||||
"* `content` of type \"Text\". This is used to store the `Document.pageContent` values.\n",
|
||||
"* `embedding` of type \"Vector\". Use the dimension used by the model you plan to use. In this notebook we use OpenAI embeddings, which have 1536 dimensions.\n",
|
||||
"* `source` of type \"Text\". This is used as a metadata column by this example.\n",
|
||||
"* `search` of type \"Text\". This is used as a metadata column by this example.\n",
|
||||
"* any other columns you want to use as metadata. They are populated from the `Document.metadata` object. For example, if in the `Document.metadata` object you have a `title` property, you can create a `title` column in the table and it will be populated.\n"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
"Splits the text based on semantic similarity.\n",
|
||||
"\n",
|
||||
"Taken from Greg Kamradt's wonderful notebook:\n",
|
||||
"[5_Levels_Of_Text_Splitting](https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/5_Levels_Of_Text_Splitting.ipynb)\n",
|
||||
"https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/5_Levels_Of_Text_Splitting.ipynb\n",
|
||||
"\n",
|
||||
"All credit to him.\n",
|
||||
"\n",
|
||||
|
||||
@@ -49,14 +49,6 @@
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a3ba1d8a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The `.from_tiktoken_encoder()` method takes either `encoding` as an argument (e.g. `cl100k_base`), or the `model_name` (e.g. `gpt-4`). All additional arguments like `chunk_size`, `chunk_overlap`, and `separators` are used to instantiate `CharacterTextSplitter`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@@ -65,7 +57,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n",
|
||||
" encoding=\"cl100k_base\", chunk_size=100, chunk_overlap=0\n",
|
||||
" chunk_size=100, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)"
|
||||
]
|
||||
@@ -99,31 +91,9 @@
|
||||
"id": "de5b6a6e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note that if we use `CharacterTextSplitter.from_tiktoken_encoder`, text is only split by `CharacterTextSplitter` and `tiktoken` tokenizer is used to merge splits. It means that split can be larger than chunk size measured by `tiktoken` tokenizer. We can use `RecursiveCharacterTextSplitter.from_tiktoken_encoder` to make sure splits are not larger than chunk size of tokens allowed by the language model, where each split will be recursively split if it has a larger size:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0262a991",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"Note that if we use `CharacterTextSplitter.from_tiktoken_encoder`, text is only split by `CharacterTextSplitter` and `tiktoken` tokenizer is used to merge splits. It means that split can be larger than chunk size measured by `tiktoken` tokenizer. We can use `RecursiveCharacterTextSplitter.from_tiktoken_encoder` to make sure splits are not larger than chunk size of tokens allowed by the language model, where each split will be recursively split if it has a larger size.\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(\n",
|
||||
" model_name=\"gpt-4\",\n",
|
||||
" chunk_size=100,\n",
|
||||
" chunk_overlap=0,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "04457e3a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also load a tiktoken splitter directly, which will ensure each split is smaller than chunk size."
|
||||
"We can also load a tiktoken splitter directly, which ensure each split is smaller than chunk size."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -141,14 +111,6 @@
|
||||
"print(texts[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3bc155d0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Some written languages (e.g. Chinese and Japanese) have characters which encode to 2 or more tokens. Using the `TokenTextSplitter` directly can split the tokens for a character between two chunks causing malformed Unicode characters. Use `RecursiveCharacterTextSplitter.from_tiktoken_encoder` or `CharacterTextSplitter.from_tiktoken_encoder` to ensure chunks contain valid Unicode strings."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "55f95f06",
|
||||
|
||||
@@ -60,7 +60,7 @@
|
||||
" * document addition by id (`add_documents` method with `ids` argument)\n",
|
||||
" * delete by id (`delete` method with `ids` argument)\n",
|
||||
"\n",
|
||||
"Compatible Vectorstores: `AnalyticDB`, `AstraDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `CouchbaseVectorStore`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `Milvus`, `MyScale`, `OpenSearchVectorSearch`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n",
|
||||
"Compatible Vectorstores: `AnalyticDB`, `AstraDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `Milvus`, `MyScale`, `OpenSearchVectorSearch`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n",
|
||||
" \n",
|
||||
"## Caution\n",
|
||||
"\n",
|
||||
@@ -85,7 +85,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "15f7263e-c82e-4914-874f-9699ea4de93e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -192,7 +192,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 3,
|
||||
"id": "67d2a5c8-f2bd-489a-b58e-2c7ba7fefe6f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -724,7 +724,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'num_added': 2, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 2}"
|
||||
"{'num_added': 0, 'num_updated': 0, 'num_skipped': 2, 'num_deleted': 2}"
|
||||
]
|
||||
},
|
||||
"execution_count": 30,
|
||||
@@ -751,9 +751,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='woof woof', metadata={'source': 'doggy.txt'}),\n",
|
||||
" Document(page_content='woof woof woof', metadata={'source': 'doggy.txt'}),\n",
|
||||
" Document(page_content='tty kitty', metadata={'source': 'kitty.txt'}),\n",
|
||||
"[Document(page_content='tty kitty', metadata={'source': 'kitty.txt'}),\n",
|
||||
" Document(page_content='tty kitty ki', metadata={'source': 'kitty.txt'}),\n",
|
||||
" Document(page_content='kitty kit', metadata={'source': 'kitty.txt'})]"
|
||||
]
|
||||
@@ -906,7 +904,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.12"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -22,11 +22,10 @@
|
||||
"Caching embeddings can be done using a `CacheBackedEmbeddings`. The cache backed embedder is a wrapper around an embedder that caches\n",
|
||||
"embeddings in a key-value store. The text is hashed and the hash is used as the key in the cache.\n",
|
||||
"\n",
|
||||
"The main supported way to initialize a `CacheBackedEmbeddings` is `from_bytes_store`. It takes the following parameters:\n",
|
||||
"The main supported way to initialized a `CacheBackedEmbeddings` is `from_bytes_store`. This takes in the following parameters:\n",
|
||||
"\n",
|
||||
"- underlying_embedder: The embedder to use for embedding.\n",
|
||||
"- document_embedding_cache: Any [`ByteStore`](/docs/integrations/stores/) for caching document embeddings.\n",
|
||||
"- batch_size: (optional, defaults to `None`) The number of documents to embed between store updates.\n",
|
||||
"- namespace: (optional, defaults to `\"\"`) The namespace to use for document cache. This namespace is used to avoid collisions with other caches. For example, set it to the name of the embedding model used.\n",
|
||||
"\n",
|
||||
"**Attention**: Be sure to set the `namespace` parameter to avoid collisions of the same text embedded using different embeddings models."
|
||||
|
||||
@@ -12,7 +12,7 @@ etc., have a function-calling API that lets you describe functions and
|
||||
their arguments, and have the model return a JSON object with a function
|
||||
to invoke and the inputs to that function. Function-calling is extremely
|
||||
useful for building [tool-using chains and
|
||||
agents](../../../../docs/use_cases/tool_use/), and for getting
|
||||
agents](/docs/use_cases/tool_use/), and for getting
|
||||
structured outputs from models more generally.
|
||||
|
||||
LangChain comes with a number of utilities to make function-calling
|
||||
@@ -28,9 +28,9 @@ easy. Namely, it comes with:
|
||||
|
||||
We’ll focus here on the first two points. For a detailed guide on output
|
||||
parsing check out the [OpenAI Tools output
|
||||
parsers](../../../../docs/modules/model_io/output_parsers/types/openai_tools)
|
||||
parsers](/docs/modules/model_io/output_parsers/types/openai_tools)
|
||||
and to see the structured output chains check out the [Structured output
|
||||
guide](../../../../docs/guides/structured_output).
|
||||
guide](/docs/guides/structured_output).
|
||||
|
||||
Before getting started make sure you have `langchain-core` installed.
|
||||
|
||||
@@ -523,13 +523,13 @@ print(json.dumps(convert_to_openai_tool(Multiply()), indent=2))
|
||||
## Next steps
|
||||
|
||||
- **Output parsing**: See [OpenAI Tools output
|
||||
parsers](../../../../docs/modules/model_io/output_parsers/types/openai_tools)
|
||||
parsers](/docs/modules/model_io/output_parsers/types/openai_tools)
|
||||
and [OpenAI Functions output
|
||||
parsers](../../../../docs/modules/model_io/output_parsers/types/openai_functions)
|
||||
parsers](/docs/modules/model_io/output_parsers/types/openai_functions)
|
||||
to learn about extracting the function calling API responses into
|
||||
various formats.
|
||||
- **Structured output chains**: [Some models have constructors](../../../../docs/guides/structured_output) that
|
||||
- **Structured output chains**: [Some models have constructors](/docs/guides/structured_output) that
|
||||
handle creating a structured output chain for you.
|
||||
- **Tool use**: See how to construct chains and agents that actually
|
||||
call the invoked tools in [these
|
||||
guides](../../../../docs/use_cases/tool_use/).
|
||||
guides](/docs/use_cases/tool_use/).
|
||||
|
||||
@@ -24,7 +24,7 @@ they take a list of chat messages as input and they return an AI message as outp
|
||||
|
||||
These two API types have pretty different input and output schemas. This means that best way to interact with them may be quite different. Although LangChain makes it possible to treat them interchangeably, that doesn't mean you **should**. In particular, the prompting strategies for LLMs vs ChatModels may be quite different. This means that you will want to make sure the prompt you are using is designed for the model type you are working with.
|
||||
|
||||
Additionally, not all models are the same. Different models have different prompting strategies that work best for them. For example, Anthropic's models work best with XML while OpenAI's work best with JSON. This means that the prompt you use for one model may not transfer to other ones. LangChain provides a lot of default prompts, however these are not guaranteed to work well with the model you are using. Historically speaking, most prompts work well with OpenAI but are not heavily tested on other models. This is something we are working to address, but it is something you should keep in mind.
|
||||
Additionally, not all models are the same. Different models have different prompting strategies that work best for them. For example, Anthropic's models work best with XML while OpenAI's work best with JSON. This means that the prompt you use for one model may not transfer to other ones. LangChain provides a lot of default prompts, however these are not guaranteed to work well with the model are you using. Historically speaking, most prompts work well with OpenAI but are not heavily tested on other models. This is something we are working to address, but it is something you should keep in mind.
|
||||
|
||||
|
||||
## Messages
|
||||
@@ -68,11 +68,11 @@ ChatModels and LLMs take different input types. PromptValue is a class designed
|
||||
|
||||
### PromptTemplate
|
||||
|
||||
[This](/docs/modules/model_io/prompts/quick_start#prompttemplate) is an example of a prompt template. This consists of a template string. This string is then formatted with user inputs to produce a final string.
|
||||
This is an example of a prompt template. This consists of a template string. This string is then formatted with user inputs to produce a final string.
|
||||
|
||||
### MessagePromptTemplate
|
||||
|
||||
[This](/docs/modules/model_io/prompts/message_prompts) is an example of a prompt template. This consists of a template **message** - meaning a specific role and a PromptTemplate. This PromptTemplate is then formatted with user inputs to produce a final string that becomes the `content` of this message.
|
||||
This is an example of a prompt template. This consists of a template **message** - meaning a specific role and a PromptTemplate. This PromptTemplate is then formatted with user inputs to produce a final string that becomes the `content` of this message.
|
||||
|
||||
#### HumanMessagePromptTemplate
|
||||
|
||||
@@ -92,7 +92,7 @@ Oftentimes inputs to prompts can be a list of messages. This is when you would u
|
||||
|
||||
### ChatPromptTemplate
|
||||
|
||||
[This](/docs/modules/model_io/prompts/quick_start#chatprompttemplate) is an example of a prompt template. This consists of a list of MessagePromptTemplates or MessagePlaceholders. These are then formatted with user inputs to produce a final list of messages.
|
||||
This is an example of a prompt template. This consists of a list of MessagePromptTemplates or MessagePlaceholders. These are then formatted with user inputs to produce a final list of messages.
|
||||
|
||||
## Output Parsers
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ complexity.
|
||||
## Architecture
|
||||
|
||||
We’ll create a typical RAG application as outlined in the [Q&A
|
||||
introduction](../../../docs/use_cases/question_answering/), which has
|
||||
introduction](/docs/use_cases/question_answering/), which has
|
||||
two main components:
|
||||
|
||||
**Indexing**: a pipeline for ingesting data from a source and indexing
|
||||
@@ -35,28 +35,28 @@ The full sequence from raw data to answer will look like:
|
||||
#### Indexing
|
||||
|
||||
1. **Load**: First we need to load our data. We’ll use
|
||||
[DocumentLoaders](../../../docs/modules/data_connection/document_loaders/)
|
||||
[DocumentLoaders](/docs/modules/data_connection/document_loaders/)
|
||||
for this.
|
||||
2. **Split**: [Text
|
||||
splitters](../../../docs/modules/data_connection/document_transformers/)
|
||||
splitters](/docs/modules/data_connection/document_transformers/)
|
||||
break large `Documents` into smaller chunks. This is useful both for
|
||||
indexing data and for passing it in to a model, since large chunks
|
||||
are harder to search over and won’t fit in a model’s finite context
|
||||
window.
|
||||
3. **Store**: We need somewhere to store and index our splits, so that
|
||||
they can later be searched over. This is often done using a
|
||||
[VectorStore](../../../docs/modules/data_connection/vectorstores/)
|
||||
[VectorStore](/docs/modules/data_connection/vectorstores/)
|
||||
and
|
||||
[Embeddings](../../../docs/modules/data_connection/text_embedding/)
|
||||
[Embeddings](/docs/modules/data_connection/text_embedding/)
|
||||
model.
|
||||
|
||||
#### Retrieval and generation
|
||||
|
||||
1. **Retrieve**: Given a user input, relevant splits are retrieved from
|
||||
storage using a
|
||||
[Retriever](../../../docs/modules/data_connection/retrievers/).
|
||||
2. **Generate**: A [ChatModel](../../../docs/modules/model_io/chat/) /
|
||||
[LLM](../../../docs/modules/model_io/llms/) produces an answer using
|
||||
[Retriever](/docs/modules/data_connection/retrievers/).
|
||||
2. **Generate**: A [ChatModel](/docs/modules/model_io/chat/) /
|
||||
[LLM](/docs/modules/model_io/llms/) produces an answer using
|
||||
a prompt that includes the question and the retrieved data
|
||||
|
||||
## Setup
|
||||
@@ -65,11 +65,11 @@ The full sequence from raw data to answer will look like:
|
||||
|
||||
We’ll use an OpenAI chat model and embeddings and a Chroma vector store
|
||||
in this walkthrough, but everything shown here works with any
|
||||
[ChatModel](../../../docs/modules/model_io/chat/) or
|
||||
[LLM](../../../docs/modules/model_io/llms/),
|
||||
[Embeddings](../../../docs/modules/data_connection/text_embedding/), and
|
||||
[VectorStore](../../../docs/modules/data_connection/vectorstores/) or
|
||||
[Retriever](../../../docs/modules/data_connection/retrievers/).
|
||||
[ChatModel](/docs/modules/model_io/chat/) or
|
||||
[LLM](/docs/modules/model_io/llms/),
|
||||
[Embeddings](/docs/modules/data_connection/text_embedding/), and
|
||||
[VectorStore](/docs/modules/data_connection/vectorstores/) or
|
||||
[Retriever](/docs/modules/data_connection/retrievers/).
|
||||
|
||||
We’ll use the following packages:
|
||||
|
||||
@@ -189,7 +189,7 @@ going on.
|
||||
## 1. Indexing: Load {#indexing-load}
|
||||
|
||||
We need to first load the blog post contents. We can use
|
||||
[DocumentLoaders](../../../docs/modules/data_connection/document_loaders/)
|
||||
[DocumentLoaders](/docs/modules/data_connection/document_loaders/)
|
||||
for this, which are objects that load in data from a source and return a
|
||||
list of
|
||||
[Documents](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html).
|
||||
@@ -197,7 +197,7 @@ A `Document` is an object with some `page_content` (str) and `metadata`
|
||||
(dict).
|
||||
|
||||
In this case we’ll use the
|
||||
[WebBaseLoader](../../../docs/integrations/document_loaders/web_base),
|
||||
[WebBaseLoader](/docs/integrations/document_loaders/web_base),
|
||||
which uses `urllib` to load HTML from web URLs and `BeautifulSoup` to
|
||||
parse it to text. We can customize the HTML -\> text parsing by passing
|
||||
in parameters to the `BeautifulSoup` parser via `bs_kwargs` (see
|
||||
@@ -249,11 +249,11 @@ In
|
||||
|
||||
`DocumentLoader`: Object that loads data from a source as list of
|
||||
`Documents`.
|
||||
- [Docs](../../../docs/modules/data_connection/document_loaders/):
|
||||
- [Docs](/docs/modules/data_connection/document_loaders/):
|
||||
Detailed documentation on how to use `DocumentLoaders`.
|
||||
- [Integrations](../../../docs/integrations/document_loaders/): 160+
|
||||
- [Integrations](/docs/integrations/document_loaders/): 160+
|
||||
integrations to choose from.
|
||||
- [Interface](https://api.python.langchain.com/en/latest/document_loaders/langchain_core.document_loaders.base.BaseLoader.html):
|
||||
- [Interface](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.base.BaseLoader.html):
|
||||
API reference for the base interface.
|
||||
|
||||
## 2. Indexing: Split {#indexing-split}
|
||||
@@ -271,7 +271,7 @@ In this case we’ll split our documents into chunks of 1000 characters
|
||||
with 200 characters of overlap between chunks. The overlap helps
|
||||
mitigate the possibility of separating a statement from important
|
||||
context related to it. We use the
|
||||
[RecursiveCharacterTextSplitter](../../../docs/modules/data_connection/document_transformers/recursive_text_splitter),
|
||||
[RecursiveCharacterTextSplitter](/docs/modules/data_connection/document_transformers/recursive_text_splitter),
|
||||
which will recursively split the document using common separators like
|
||||
new lines until each chunk is the appropriate size. This is the
|
||||
recommended text splitter for generic text use cases.
|
||||
@@ -321,15 +321,15 @@ all_splits[10].metadata
|
||||
chunks. Subclass of `DocumentTransformer`s.
|
||||
- Explore `Context-aware splitters`, which keep the location (“context”) of each
|
||||
split in the original `Document`: - [Markdown
|
||||
files](../../../docs/modules/data_connection/document_transformers/markdown_header_metadata)
|
||||
- [Code (py or js)](../../../docs/integrations/document_loaders/source_code)
|
||||
- [Scientific papers](../../../docs/integrations/document_loaders/grobid)
|
||||
- [Interface](https://api.python.langchain.com/en/latest/base/langchain_text_splitters.base.TextSplitter.html): API reference for the base interface.
|
||||
files](/docs/modules/data_connection/document_transformers/markdown_header_metadata)
|
||||
- [Code (py or js)](/docs/integrations/document_loaders/source_code)
|
||||
- [Scientific papers](/docs/integrations/document_loaders/grobid)
|
||||
- [Interface](https://api.python.langchain.com/en/latest/text_splitter/langchain_text_splitters.TextSplitter.html): API reference for the base interface.
|
||||
|
||||
`DocumentTransformer`: Object that performs a transformation on a list
|
||||
of `Document`s.
|
||||
- [Docs](../../../docs/modules/data_connection/document_transformers/): Detailed documentation on how to use `DocumentTransformers`
|
||||
- [Integrations](../../../docs/integrations/document_transformers/)
|
||||
- [Docs](/docs/modules/data_connection/document_transformers/): Detailed documentation on how to use `DocumentTransformers`
|
||||
- [Integrations](/docs/integrations/document_transformers/)
|
||||
- [Interface](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.transformers.BaseDocumentTransformer.html): API reference for the base interface.
|
||||
|
||||
## 3. Indexing: Store {#indexing-store}
|
||||
@@ -345,9 +345,9 @@ similarity — we measure the cosine of the angle between each pair of
|
||||
embeddings (which are high dimensional vectors).
|
||||
|
||||
We can embed and store all of our document splits in a single command
|
||||
using the [Chroma](../../../docs/integrations/vectorstores/chroma)
|
||||
using the [Chroma](/docs/integrations/vectorstores/chroma)
|
||||
vector store and
|
||||
[OpenAIEmbeddings](../../../docs/integrations/text_embedding/openai)
|
||||
[OpenAIEmbeddings](/docs/integrations/text_embedding/openai)
|
||||
model.
|
||||
|
||||
```python
|
||||
@@ -361,14 +361,14 @@ vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbedd
|
||||
|
||||
`Embeddings`: Wrapper around a text embedding model, used for converting
|
||||
text to embeddings.
|
||||
- [Docs](../../../docs/modules/data_connection/text_embedding): Detailed documentation on how to use embeddings.
|
||||
- [Integrations](../../../docs/integrations/text_embedding/): 30+ integrations to choose from.
|
||||
- [Docs](/docs/modules/data_connection/text_embedding): Detailed documentation on how to use embeddings.
|
||||
- [Integrations](/docs/integrations/text_embedding/): 30+ integrations to choose from.
|
||||
- [Interface](https://api.python.langchain.com/en/latest/embeddings/langchain_core.embeddings.Embeddings.html): API reference for the base interface.
|
||||
|
||||
`VectorStore`: Wrapper around a vector database, used for storing and
|
||||
querying embeddings.
|
||||
- [Docs](../../../docs/modules/data_connection/vectorstores/): Detailed documentation on how to use vector stores.
|
||||
- [Integrations](../../../docs/integrations/vectorstores/): 40+ integrations to choose from.
|
||||
- [Docs](/docs/modules/data_connection/vectorstores/): Detailed documentation on how to use vector stores.
|
||||
- [Integrations](/docs/integrations/vectorstores/): 40+ integrations to choose from.
|
||||
- [Interface](https://api.python.langchain.com/en/latest/vectorstores/langchain_core.vectorstores.VectorStore.html): API reference for the base interface.
|
||||
|
||||
This completes the **Indexing** portion of the pipeline. At this point
|
||||
@@ -385,12 +385,12 @@ a model, and returns an answer.
|
||||
|
||||
First we need to define our logic for searching over documents.
|
||||
LangChain defines a
|
||||
[Retriever](../../../docs/modules/data_connection/retrievers/) interface
|
||||
[Retriever](/docs/modules/data_connection/retrievers/) interface
|
||||
which wraps an index that can return relevant `Documents` given a string
|
||||
query.
|
||||
|
||||
The most common type of `Retriever` is the
|
||||
[VectorStoreRetriever](../../../docs/modules/data_connection/retrievers/vectorstore),
|
||||
[VectorStoreRetriever](/docs/modules/data_connection/retrievers/vectorstore),
|
||||
which uses the similarity search capabilities of a vector store to
|
||||
facilitate retrieval. Any `VectorStore` can easily be turned into a
|
||||
`Retriever` with `VectorStore.as_retriever()`:
|
||||
@@ -429,15 +429,15 @@ to do retrieval, too.
|
||||
|
||||
`Retriever`: An object that returns `Document`s given a text query
|
||||
|
||||
- [Docs](../../../docs/modules/data_connection/retrievers/): Further
|
||||
- [Docs](/docs/modules/data_connection/retrievers/): Further
|
||||
documentation on the interface and built-in retrieval techniques.
|
||||
Some of which include:
|
||||
- `MultiQueryRetriever` [generates variants of the input
|
||||
question](../../../docs/modules/data_connection/retrievers/MultiQueryRetriever)
|
||||
question](/docs/modules/data_connection/retrievers/MultiQueryRetriever)
|
||||
to improve retrieval hit rate.
|
||||
- `MultiVectorRetriever` (diagram below) instead generates
|
||||
[variants of the
|
||||
embeddings](../../../docs/modules/data_connection/retrievers/multi_vector),
|
||||
embeddings](/docs/modules/data_connection/retrievers/multi_vector),
|
||||
also in order to improve retrieval hit rate.
|
||||
- `Max marginal relevance` selects for [relevance and
|
||||
diversity](https://www.cs.cmu.edu/~jgc/publication/The_Use_MMR_Diversity_Based_LTMIR_1998.pdf)
|
||||
@@ -445,8 +445,8 @@ to do retrieval, too.
|
||||
context.
|
||||
- Documents can be filtered during vector store retrieval using
|
||||
metadata filters, such as with a [Self Query
|
||||
Retriever](../../../docs/modules/data_connection/retrievers/self_query).
|
||||
- [Integrations](../../../docs/integrations/retrievers/): Integrations
|
||||
Retriever](/docs/modules/data_connection/retrievers/self_query).
|
||||
- [Integrations](/docs/integrations/retrievers/): Integrations
|
||||
with retrieval services.
|
||||
- [Interface](https://api.python.langchain.com/en/latest/retrievers/langchain_core.retrievers.BaseRetriever.html):
|
||||
API reference for the base interface.
|
||||
@@ -521,7 +521,7 @@ Context: filler context
|
||||
Answer:
|
||||
```
|
||||
|
||||
We’ll use the [LCEL Runnable](../../../docs/expression_language/)
|
||||
We’ll use the [LCEL Runnable](/docs/expression_language/)
|
||||
protocol to define the chain, allowing us to - pipe together components
|
||||
and functions in a transparent way - automatically trace our chain in
|
||||
LangSmith - get streaming, async, and batched calling out of the box
|
||||
@@ -562,17 +562,17 @@ trace](https://smith.langchain.com/public/1799e8db-8a6d-4eb2-84d5-46e8d7d5a99b/r
|
||||
|
||||
`ChatModel`: An LLM-backed chat model. Takes in a sequence of messages
|
||||
and returns a message.
|
||||
- [Docs](../../../docs/modules/model_io/chat/)
|
||||
- [Integrations](../../../docs/integrations/chat/): 25+ integrations to choose from.
|
||||
- [Docs](/docs/modules/model_io/chat/)
|
||||
- [Integrations](/docs/integrations/chat/): 25+ integrations to choose from.
|
||||
- [Interface](https://api.python.langchain.com/en/latest/language_models/langchain_core.language_models.chat_models.BaseChatModel.html): API reference for the base interface.
|
||||
|
||||
`LLM`: A text-in-text-out LLM. Takes in a string and returns a string.
|
||||
- [Docs](../../../docs/modules/model_io/llms)
|
||||
- [Integrations](../../../docs/integrations/llms): 75+ integrations to choose from.
|
||||
- [Docs](/docs/modules/model_io/llms)
|
||||
- [Integrations](/docs/integrations/llms): 75+ integrations to choose from.
|
||||
- [Interface](https://api.python.langchain.com/en/latest/language_models/langchain_core.language_models.llms.BaseLLM.html): API reference for the base interface.
|
||||
|
||||
See a guide on RAG with locally-running models
|
||||
[here](../../../docs/use_cases/question_answering/local_retrieval_qa).
|
||||
[here](/docs/use_cases/question_answering/local_retrieval_qa).
|
||||
|
||||
#### Customizing the prompt
|
||||
|
||||
@@ -620,10 +620,10 @@ the above sections. Along from the **Go deeper** sources mentioned
|
||||
above, good next steps include:
|
||||
|
||||
- [Return
|
||||
sources](../../../docs/use_cases/question_answering/sources): Learn
|
||||
sources](/docs/use_cases/question_answering/sources): Learn
|
||||
how to return source documents
|
||||
- [Streaming](../../../docs/use_cases/question_answering/streaming):
|
||||
- [Streaming](/docs/use_cases/question_answering/streaming):
|
||||
Learn how to stream outputs and intermediate steps
|
||||
- [Add chat
|
||||
history](../../../docs/use_cases/question_answering/chat_history):
|
||||
history](/docs/use_cases/question_answering/chat_history):
|
||||
Learn how to add chat history to your app
|
||||
|
||||
@@ -82,6 +82,12 @@ const config = {
|
||||
({
|
||||
docs: {
|
||||
sidebarPath: require.resolve("./sidebars.js"),
|
||||
lastVersion: "current",
|
||||
versions: {
|
||||
current: {
|
||||
label: '0.2.x',
|
||||
}
|
||||
},
|
||||
remarkPlugins: [
|
||||
[require("@docusaurus/remark-plugin-npm2yarn"), { sync: true }],
|
||||
],
|
||||
@@ -149,9 +155,10 @@ const config = {
|
||||
logo: {src: "img/brand/wordmark.png", srcDark: "img/brand/wordmark-dark.png"},
|
||||
items: [
|
||||
{
|
||||
to: "/docs/get_started/introduction",
|
||||
label: "Docs",
|
||||
position: "left",
|
||||
type: 'doc',
|
||||
docId: 'get_started/introduction',
|
||||
label: 'Docs',
|
||||
position: 'left',
|
||||
},
|
||||
{
|
||||
type: "docSidebar",
|
||||
@@ -182,11 +189,13 @@ const config = {
|
||||
position: "left",
|
||||
items: [
|
||||
{
|
||||
to: "/docs/people/",
|
||||
type: 'doc',
|
||||
docId: 'people',
|
||||
label: "People",
|
||||
},
|
||||
{
|
||||
to: "/docs/packages",
|
||||
docId: "packages",
|
||||
type: 'doc',
|
||||
label: "Versioning",
|
||||
},
|
||||
{
|
||||
@@ -195,7 +204,8 @@ const config = {
|
||||
label: "Changelog",
|
||||
},
|
||||
{
|
||||
to: "/docs/contributing",
|
||||
docId: "contributing/index",
|
||||
type: 'doc',
|
||||
label: "Contributing",
|
||||
},
|
||||
{
|
||||
@@ -208,15 +218,21 @@ const config = {
|
||||
href: "https://github.com/langchain-ai/langchain/blob/master/cookbook/README.md"
|
||||
},
|
||||
{
|
||||
to: "/docs/additional_resources/tutorials",
|
||||
docId: "additional_resources/tutorials",
|
||||
type: "doc",
|
||||
label: "Tutorials"
|
||||
},
|
||||
{
|
||||
to: "/docs/additional_resources/youtube",
|
||||
docId: "additional_resources/youtube",
|
||||
type: "doc",
|
||||
label: "YouTube"
|
||||
},
|
||||
]
|
||||
},
|
||||
{
|
||||
type: 'docsVersionDropdown',
|
||||
position: 'right'
|
||||
},
|
||||
{
|
||||
type: "dropdown",
|
||||
label: "🦜️🔗",
|
||||
@@ -234,10 +250,6 @@ const config = {
|
||||
href: "https://github.com/langchain-ai/langserve",
|
||||
label: "LangServe GitHub",
|
||||
},
|
||||
{
|
||||
href: "https://github.com/langchain-ai/langchain/tree/master/templates",
|
||||
label: "Templates GitHub",
|
||||
},
|
||||
{
|
||||
label: "Templates Hub",
|
||||
href: "https://templates.langchain.com",
|
||||
|
||||
@@ -18,8 +18,7 @@
|
||||
"format": "prettier --write \"**/*.{js,jsx,ts,tsx,md,mdx}\"",
|
||||
"format:check": "prettier --check \"**/*.{js,jsx,ts,tsx,md,mdx}\"",
|
||||
"gen": "yarn gen:supabase",
|
||||
"gen:supabase": "npx supabase gen types typescript --project-id 'xsqpnijvmbodcxyapnyq' --schema public > ./src/supabase.d.ts",
|
||||
"check-broken-links": "bash vercel_build.sh && node ./scripts/check-broken-links.js"
|
||||
"gen:supabase": "npx supabase gen types typescript --project-id 'xsqpnijvmbodcxyapnyq' --schema public > ./src/supabase.d.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@docusaurus/core": "2.4.3",
|
||||
@@ -39,7 +38,6 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/eslint-parser": "^7.18.2",
|
||||
"@langchain/scripts": "^0.0.10",
|
||||
"docusaurus-plugin-typedoc": "next",
|
||||
"dotenv": "^16.4.5",
|
||||
"eslint": "^8.19.0",
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
// Sorry py folks, gotta be js for this one
|
||||
const { checkBrokenLinks } = require("@langchain/scripts/check_broken_links");
|
||||
|
||||
checkBrokenLinks("docs", {
|
||||
timeout: 10000,
|
||||
retryFailed: true,
|
||||
});
|
||||
@@ -1,103 +0,0 @@
|
||||
/* eslint-disable react/jsx-props-no-spreading */
|
||||
import React from "react";
|
||||
import Tabs from "@theme/Tabs";
|
||||
import TabItem from "@theme/TabItem";
|
||||
import CodeBlock from "@theme-original/CodeBlock";
|
||||
|
||||
function Setup({ apiKeyName, packageName }) {
|
||||
const apiKeyText = `import getpass
|
||||
import os
|
||||
|
||||
os.environ["${apiKeyName}"] = getpass.getpass()`;
|
||||
return (
|
||||
<>
|
||||
<h5>Install dependencies</h5>
|
||||
<CodeBlock language="bash">{`pip install -qU ${packageName}`}</CodeBlock>
|
||||
<h5>Set environment variables</h5>
|
||||
<CodeBlock language="python">{apiKeyText}</CodeBlock>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {{ openaiParams?: string, anthropicParams?: string, fireworksParams?: string, mistralParams?: string, googleParams?: string, hideOpenai?: boolean, hideAnthropic?: boolean, hideFireworks?: boolean, hideMistral?: boolean, hideGoogle?: boolean }} props
|
||||
*/
|
||||
export default function ChatModelTabs(props) {
|
||||
const {
|
||||
openaiParams,
|
||||
anthropicParams,
|
||||
fireworksParams,
|
||||
mistralParams,
|
||||
googleParams,
|
||||
hideOpenai,
|
||||
hideAnthropic,
|
||||
hideFireworks,
|
||||
hideMistral,
|
||||
hideGoogle,
|
||||
} = props;
|
||||
|
||||
const openAIParamsOrDefault = openaiParams ?? `model="gpt-3.5-turbo-0125"`
|
||||
const anthropicParamsOrDefault = anthropicParams ?? `model="claude-3-sonnet-20240229"`
|
||||
const fireworksParamsOrDefault = fireworksParams ?? `model="accounts/fireworks/models/mixtral-8x7b-instruct"`
|
||||
const mistralParamsOrDefault = mistralParams ?? `model="mistral-large-latest"`
|
||||
const googleParamsOrDefault = googleParams ?? `model="gemini-pro"`
|
||||
|
||||
const tabItems = [
|
||||
{
|
||||
value: "OpenAI",
|
||||
label: "OpenAI",
|
||||
text: `from langchain_openai import ChatOpenAI\n\nmodel = ChatOpenAI(${openAIParamsOrDefault})`,
|
||||
apiKeyName: "OPENAI_API_KEY",
|
||||
packageName: "langchain-openai",
|
||||
default: true,
|
||||
shouldHide: hideOpenai,
|
||||
},
|
||||
{
|
||||
value: "Anthropic",
|
||||
label: "Anthropic",
|
||||
text: `from langchain_anthropic import ChatAnthropic\n\nmodel = ChatAnthropic(${anthropicParamsOrDefault})`,
|
||||
apiKeyName: "ANTHROPIC_API_KEY",
|
||||
packageName: "langchain-anthropic",
|
||||
default: false,
|
||||
shouldHide: hideAnthropic,
|
||||
},
|
||||
{
|
||||
value: "FireworksAI",
|
||||
label: "FireworksAI",
|
||||
text: `from langchain_fireworks import ChatFireworks\n\nmodel = ChatFireworks(${fireworksParamsOrDefault})`,
|
||||
apiKeyName: "FIREWORKS_API_KEY",
|
||||
packageName: "langchain-fireworks",
|
||||
default: false,
|
||||
shouldHide: hideFireworks,
|
||||
},
|
||||
{
|
||||
value: "MistralAI",
|
||||
label: "MistralAI",
|
||||
text: `from langchain_mistralai import ChatMistralAI\n\nmodel = ChatMistralAI(${mistralParamsOrDefault})`,
|
||||
apiKeyName: "MISTRAL_API_KEY",
|
||||
packageName: "langchain-mistralai",
|
||||
default: false,
|
||||
shouldHide: hideMistral,
|
||||
},
|
||||
{
|
||||
value: "Google",
|
||||
label: "Google",
|
||||
text: `from langchain_google_genai import ChatGoogleGenerativeAI\n\nmodel = ChatGoogleGenerativeAI(${googleParamsOrDefault})`,
|
||||
apiKeyName: "GOOGLE_API_KEY",
|
||||
packageName: "langchain-google-genai",
|
||||
default: false,
|
||||
shouldHide: hideGoogle,
|
||||
}
|
||||
]
|
||||
|
||||
return (
|
||||
<Tabs groupId="modelTabs">
|
||||
{tabItems.filter((tabItem) => !tabItem.shouldHide).map((tabItem) => (
|
||||
<TabItem value={tabItem.value} label={tabItem.label} default={tabItem.default}>
|
||||
<Setup apiKeyName={tabItem.apiKeyName} packageName={tabItem.packageName} />
|
||||
<CodeBlock language="python">{tabItem.text}</CodeBlock>
|
||||
</TabItem>
|
||||
))}
|
||||
</Tabs>
|
||||
);
|
||||
}
|
||||
@@ -33,3 +33,4 @@ python3 scripts/resolve_local_links.py docs/langgraph.md https://github.com/lang
|
||||
|
||||
# render
|
||||
quarto render docs/
|
||||
quarto render versioned_docs/
|
||||
|
||||
BIN
docs/versioned_docs/static/img/ApifyActors.png
Normal file
|
After Width: | Height: | Size: 147 KiB |
BIN
docs/versioned_docs/static/img/HeliconeDashboard.png
Normal file
|
After Width: | Height: | Size: 56 KiB |
BIN
docs/versioned_docs/static/img/HeliconeKeys.png
Normal file
|
After Width: | Height: | Size: 148 KiB |
BIN
docs/versioned_docs/static/img/MetalDash.png
Normal file
|
After Width: | Height: | Size: 193 KiB |
BIN
docs/versioned_docs/static/img/OSS_LLM_overview.png
Normal file
|
After Width: | Height: | Size: 64 KiB |
BIN
docs/versioned_docs/static/img/ReAct.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
docs/versioned_docs/static/img/RemembrallDashboard.png
Normal file
|
After Width: | Height: | Size: 190 KiB |
BIN
docs/versioned_docs/static/img/SQLDatabaseToolkit.png
Normal file
|
After Width: | Height: | Size: 121 KiB |
BIN
docs/versioned_docs/static/img/agent.png
Normal file
|
After Width: | Height: | Size: 168 KiB |
BIN
docs/versioned_docs/static/img/agents_use_case_1.png
Normal file
|
After Width: | Height: | Size: 52 KiB |
BIN
docs/versioned_docs/static/img/agents_use_case_trace_1.png
Normal file
|
After Width: | Height: | Size: 74 KiB |
BIN
docs/versioned_docs/static/img/agents_use_case_trace_2.png
Normal file
|
After Width: | Height: | Size: 166 KiB |
BIN
docs/versioned_docs/static/img/agents_vs_chains.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
docs/versioned_docs/static/img/api_chain.png
Normal file
|
After Width: | Height: | Size: 150 KiB |
BIN
docs/versioned_docs/static/img/api_chain_response.png
Normal file
|
After Width: | Height: | Size: 167 KiB |
BIN
docs/versioned_docs/static/img/api_function_call.png
Normal file
|
After Width: | Height: | Size: 98 KiB |
BIN
docs/versioned_docs/static/img/api_use_case.png
Normal file
|
After Width: | Height: | Size: 117 KiB |
BIN
docs/versioned_docs/static/img/apple-touch-icon.png
Normal file
|
After Width: | Height: | Size: 16 KiB |
BIN
docs/versioned_docs/static/img/brand/favicon.png
Normal file
|
After Width: | Height: | Size: 777 B |
BIN
docs/versioned_docs/static/img/brand/theme-image.png
Normal file
|
After Width: | Height: | Size: 192 KiB |
BIN
docs/versioned_docs/static/img/brand/wordmark-dark.png
Normal file
|
After Width: | Height: | Size: 20 KiB |
BIN
docs/versioned_docs/static/img/brand/wordmark.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
docs/versioned_docs/static/img/chat_use_case.png
Normal file
|
After Width: | Height: | Size: 93 KiB |
BIN
docs/versioned_docs/static/img/chat_use_case_2.png
Normal file
|
After Width: | Height: | Size: 102 KiB |
BIN
docs/versioned_docs/static/img/code_retrieval.png
Normal file
|
After Width: | Height: | Size: 84 KiB |
BIN
docs/versioned_docs/static/img/code_understanding.png
Normal file
|
After Width: | Height: | Size: 54 KiB |
BIN
docs/versioned_docs/static/img/contextual_compression.jpg
Normal file
|
After Width: | Height: | Size: 78 KiB |
BIN
docs/versioned_docs/static/img/cpal_diagram.png
Normal file
|
After Width: | Height: | Size: 116 KiB |
BIN
docs/versioned_docs/static/img/create_sql_query_chain.png
Normal file
|
After Width: | Height: | Size: 54 KiB |
BIN
docs/versioned_docs/static/img/data_connection.jpg
Normal file
|
After Width: | Height: | Size: 164 KiB |
BIN
docs/versioned_docs/static/img/extraction.png
Normal file
|
After Width: | Height: | Size: 125 KiB |
BIN
docs/versioned_docs/static/img/extraction_trace_few_shot.png
Normal file
|
After Width: | Height: | Size: 325 KiB |
BIN
docs/versioned_docs/static/img/extraction_trace_function.png
Normal file
|
After Width: | Height: | Size: 131 KiB |
BIN
docs/versioned_docs/static/img/extraction_trace_parsing.png
Normal file
|
After Width: | Height: | Size: 432 KiB |
BIN
docs/versioned_docs/static/img/extraction_trace_tool.png
Normal file
|
After Width: | Height: | Size: 336 KiB |
BIN
docs/versioned_docs/static/img/favicon-16x16.png
Normal file
|
After Width: | Height: | Size: 542 B |
BIN
docs/versioned_docs/static/img/favicon-32x32.png
Normal file
|
After Width: | Height: | Size: 1.2 KiB |
BIN
docs/versioned_docs/static/img/favicon.ico
Normal file
|
After Width: | Height: | Size: 15 KiB |
BIN
docs/versioned_docs/static/img/graph_chain.webp
Normal file
|
After Width: | Height: | Size: 13 KiB |
BIN
docs/versioned_docs/static/img/graph_construction1.png
Normal file
|
After Width: | Height: | Size: 67 KiB |
BIN
docs/versioned_docs/static/img/graph_construction2.png
Normal file
|
After Width: | Height: | Size: 59 KiB |
BIN
docs/versioned_docs/static/img/graph_semantic.png
Normal file
|
After Width: | Height: | Size: 74 KiB |
BIN
docs/versioned_docs/static/img/graph_usecase.png
Normal file
|
After Width: | Height: | Size: 47 KiB |