Compare commits

..

1 Commits

Author SHA1 Message Date
Erick Friis
4671ad8227 core: default implementation for embed_documents 2024-12-05 17:17:02 -08:00
162 changed files with 5003 additions and 9988 deletions

View File

@@ -5,7 +5,6 @@ on:
push:
branches: [master]
pull_request:
merge_group:
# If another push to the same PR or branch happens while this workflow is still running,
# cancel the earlier run in favor of the next run.

View File

@@ -38,21 +38,18 @@ conda install langchain -c conda-forge
For these applications, LangChain simplifies the entire application lifecycle:
- **Open-source libraries**: Build your applications using LangChain's open-source
[components](https://python.langchain.com/docs/concepts/) and
[third-party integrations](https://python.langchain.com/docs/integrations/providers/).
- **Open-source libraries**: Build your applications using LangChain's open-source [building blocks](https://python.langchain.com/docs/concepts/#langchain-expression-language-lcel), [components](https://python.langchain.com/docs/concepts/), and [third-party integrations](https://python.langchain.com/docs/integrations/providers/).
Use [LangGraph](https://langchain-ai.github.io/langgraph/) to build stateful agents with first-class streaming and human-in-the-loop support.
- **Productionization**: Inspect, monitor, and evaluate your apps with [LangSmith](https://docs.smith.langchain.com/) so that you can constantly optimize and deploy with confidence.
- **Deployment**: Turn your LangGraph applications into production-ready APIs and Assistants with [LangGraph Platform](https://langchain-ai.github.io/langgraph/cloud/).
- **Deployment**: Turn your LangGraph applications into production-ready APIs and Assistants with [LangGraph Cloud](https://langchain-ai.github.io/langgraph/cloud/).
### Open-source libraries
- **`langchain-core`**: Base abstractions.
- **Integration packages** (e.g. **`langchain-openai`**, **`langchain-anthropic`**, etc.): Important integrations have been split into lightweight packages that are co-maintained by the LangChain team and the integration developers.
- **`langchain-core`**: Base abstractions and LangChain Expression Language.
- **`langchain-community`**: Third party integrations.
- Some integrations have been further split into **partner packages** that only rely on **`langchain-core`**. Examples include **`langchain_openai`** and **`langchain_anthropic`**.
- **`langchain`**: Chains, agents, and retrieval strategies that make up an application's cognitive architecture.
- **`langchain-community`**: Third-party integrations that are community maintained.
- **[LangGraph](https://langchain-ai.github.io/langgraph)**: Build robust and stateful multi-actor applications with LLMs by modeling steps as edges and nodes in a graph. Integrates smoothly with LangChain, but can be used without it. To learn more about LangGraph, check out our first LangChain Academy course, *Introduction to LangGraph*, available [here](https://academy.langchain.com/courses/intro-to-langgraph).
- **[`LangGraph`](https://langchain-ai.github.io/langgraph/)**: A library for building robust and stateful multi-actor applications with LLMs by modeling steps as edges and nodes in a graph. Integrates smoothly with LangChain, but can be used without it. To learn more about LangGraph, check out our first LangChain Academy course, *Introduction to LangGraph*, available [here](https://academy.langchain.com/courses/intro-to-langgraph).
### Productionization:
@@ -60,7 +57,7 @@ For these applications, LangChain simplifies the entire application lifecycle:
### Deployment:
- **[LangGraph Platform](https://langchain-ai.github.io/langgraph/cloud/)**: Turn your LangGraph applications into production-ready APIs and Assistants.
- **[LangGraph Cloud](https://langchain-ai.github.io/langgraph/cloud/)**: Turn your LangGraph applications into production-ready APIs and Assistants.
![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](docs/static/svg/langchain_stack_112024.svg#gh-light-mode-only "LangChain Architecture Overview")
![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](docs/static/svg/langchain_stack_112024_dark.svg#gh-dark-mode-only "LangChain Architecture Overview")
@@ -88,12 +85,19 @@ And much more! Head to the [Tutorials](https://python.langchain.com/docs/tutoria
The main value props of the LangChain libraries are:
1. **Components**: composable building blocks, tools and integrations for working with language models. Components are modular and easy-to-use, whether you are using the rest of the LangChain framework or not.
2. **Easy orchestration with LangGraph**: [LangGraph](https://langchain-ai.github.io/langgraph/),
built on top of `langchain-core`, has built-in support for [messages](https://python.langchain.com/docs/concepts/messages/), [tools](https://python.langchain.com/docs/concepts/tools/),
and other LangChain abstractions. This makes it easy to combine components into
production-ready applications with persistence, streaming, and other key features.
Check out the LangChain [tutorials page](https://python.langchain.com/docs/tutorials/#orchestration) for examples.
1. **Components**: composable building blocks, tools and integrations for working with language models. Components are modular and easy-to-use, whether you are using the rest of the LangChain framework or not
2. **Off-the-shelf chains**: built-in assemblages of components for accomplishing higher-level tasks
Off-the-shelf chains make it easy to get started. Components make it easy to customize existing chains and build new ones.
## LangChain Expression Language (LCEL)
LCEL is a key part of LangChain, allowing you to build and organize chains of processes in a straightforward, declarative manner. It was designed to support taking prototypes directly into production without needing to alter any code. This means you can use LCEL to set up everything from basic "prompt + LLM" setups to intricate, multi-step workflows.
- **[Overview](https://python.langchain.com/docs/concepts/#langchain-expression-language-lcel)**: LCEL and its benefits
- **[Interface](https://python.langchain.com/docs/concepts/#runnable-interface)**: The standard Runnable interface for LCEL objects
- **[Primitives](https://python.langchain.com/docs/how_to/#langchain-expression-language-lcel)**: More on the primitives LCEL includes
- **[Cheatsheet](https://python.langchain.com/docs/how_to/lcel_cheatsheet/)**: Quick overview of the most common usage patterns
## Components
@@ -101,19 +105,15 @@ Components fall into the following **modules**:
**📃 Model I/O**
This includes [prompt management](https://python.langchain.com/docs/concepts/prompt_templates/)
and a generic interface for [chat models](https://python.langchain.com/docs/concepts/chat_models/), including a consistent interface for [tool-calling](https://python.langchain.com/docs/concepts/tool_calling/) and [structured output](https://python.langchain.com/docs/concepts/structured_outputs/) across model providers.
This includes [prompt management](https://python.langchain.com/docs/concepts/#prompt-templates), [prompt optimization](https://python.langchain.com/docs/concepts/#example-selectors), a generic interface for [chat models](https://python.langchain.com/docs/concepts/#chat-models) and [LLMs](https://python.langchain.com/docs/concepts/#llms), and common utilities for working with [model outputs](https://python.langchain.com/docs/concepts/#output-parsers).
**📚 Retrieval**
Retrieval Augmented Generation involves [loading data](https://python.langchain.com/docs/concepts/document_loaders/) from a variety of sources, [preparing it](https://python.langchain.com/docs/concepts/text_splitters/), then [searching over (a.k.a. retrieving from)](https://python.langchain.com/docs/concepts/retrievers/) it for use in the generation step.
Retrieval Augmented Generation involves [loading data](https://python.langchain.com/docs/concepts/#document-loaders) from a variety of sources, [preparing it](https://python.langchain.com/docs/concepts/#text-splitters), then [searching over (a.k.a. retrieving from)](https://python.langchain.com/docs/concepts/#retrievers) it for use in the generation step.
**🤖 Agents**
Agents allow an LLM autonomy over how a task is accomplished. Agents make decisions about which Actions to take, then take that Action, observe the result, and repeat until the task is complete. [LangGraph](https://langchain-ai.github.io/langgraph/) makes it easy to use
LangChain components to build both [custom](https://langchain-ai.github.io/langgraph/tutorials/)
and [built-in](https://langchain-ai.github.io/langgraph/how-tos/create-react-agent/)
LLM agents.
Agents allow an LLM autonomy over how a task is accomplished. Agents make decisions about which Actions to take, then take that Action, observe the result, and repeat until the task is complete. LangChain provides a [standard interface for agents](https://python.langchain.com/docs/concepts/#agents), along with [LangGraph](https://github.com/langchain-ai/langgraph) for building custom agents.
## 📖 Documentation

View File

@@ -60,7 +60,6 @@ copy-infra:
cp package.json $(OUTPUT_NEW_DIR)
cp sidebars.js $(OUTPUT_NEW_DIR)
cp -r static $(OUTPUT_NEW_DIR)
cp -r ../libs/cli/langchain_cli/integration_template $(OUTPUT_NEW_DIR)/src/theme
cp yarn.lock $(OUTPUT_NEW_DIR)
render:
@@ -82,7 +81,6 @@ build: install-py-deps generate-files copy-infra render md-sync append-related
vercel-build: install-vercel-deps build generate-references
rm -rf docs
mv $(OUTPUT_NEW_DOCS_DIR) docs
cp -r ../libs/cli/langchain_cli/integration_template src/theme
rm -rf build
mkdir static/api_reference
git clone --depth=1 https://github.com/langchain-ai/langchain-api-docs-html.git

View File

@@ -87,18 +87,6 @@ class Beta(BaseAdmonition):
def setup(app):
app.add_directive("example_links", ExampleLinksDirective)
app.add_directive("beta", Beta)
app.connect("autodoc-skip-member", skip_private_members)
def skip_private_members(app, what, name, obj, skip, options):
if skip:
return True
if hasattr(obj, "__doc__") and obj.__doc__ and ":private:" in obj.__doc__:
return True
if name == "__init__" and obj.__objclass__ is object:
# dont document default init
return True
return None
# -- Project information -----------------------------------------------------

View File

@@ -72,21 +72,14 @@ def _load_module_members(module_path: str, namespace: str) -> ModuleMembers:
Returns:
list: A list of loaded module objects.
"""
classes_: List[ClassInfo] = []
functions: List[FunctionInfo] = []
module = importlib.import_module(module_path)
if ":private:" in (module.__doc__ or ""):
return ModuleMembers(classes_=[], functions=[])
for name, type_ in inspect.getmembers(module):
if not hasattr(type_, "__module__"):
continue
if type_.__module__ != module_path:
continue
if ":private:" in (type_.__doc__ or ""):
continue
if inspect.isclass(type_):
# The type of the class is used to select a template

View File

@@ -65,7 +65,7 @@ A package to deploy LangChain chains as REST APIs. Makes it easy to get a produc
:::important
LangServe is designed to primarily deploy simple Runnables and work with well-known primitives in langchain-core.
If you need a deployment option for LangGraph, you should instead be looking at LangGraph Platform (beta) which will be better suited for deploying LangGraph applications.
If you need a deployment option for LangGraph, you should instead be looking at LangGraph Cloud (beta) which will be better suited for deploying LangGraph applications.
:::
For more information, see the [LangServe documentation](/docs/langserve).

View File

@@ -1,5 +1,4 @@
---
pagination_prev: null
pagination_next: contributing/how_to/integrations/package
---
@@ -12,7 +11,7 @@ LangChain provides standard interfaces for several different components (languag
## Why contribute an integration to LangChain?
- **Discoverability:** LangChain is the most used framework for building LLM applications, with over 20 million monthly downloads. LangChain integrations are discoverable by a large community of GenAI builders.
- **Interoperability:** LangChain components expose a standard interface, allowing developers to easily swap them for each other. If you implement a LangChain integration, any developer using a different component will easily be able to swap yours in.
- **Interoptability:** LangChain components expose a standard interface, allowing developers to easily swap them for each other. If you implement a LangChain integration, any developer using a different component will easily be able to swap yours in.
- **Best Practices:** Through their standard interface, LangChain components encourage and facilitate best practices (streaming, async, etc)
@@ -38,6 +37,7 @@ While any component can be integrated into LangChain, there are specific types o
<li>Chat Models</li>
<li>Tools/Toolkits</li>
<li>Retrievers</li>
<li>Document Loaders</li>
<li>Vector Stores</li>
<li>Embedding Models</li>
</ul>
@@ -45,7 +45,6 @@ While any component can be integrated into LangChain, there are specific types o
<td>
<ul>
<li>LLMs (Text-Completion Models)</li>
<li>Document Loaders</li>
<li>Key-Value Stores</li>
<li>Document Transformers</li>
<li>Model Caches</li>

View File

@@ -12,89 +12,97 @@ which contain classes that are compatible with LangChain's core interfaces.
We will cover:
1. (Optional) How to bootstrap a new integration package
2. How to implement components, such as [chat models](/docs/concepts/chat_models/) and [vector stores](/docs/concepts/vectorstores/), that adhere
1. How to implement components, such as [chat models](/docs/concepts/chat_models/) and [vector stores](/docs/concepts/vectorstores/), that adhere
to the LangChain interface;
2. (Optional) How to bootstrap a new integration package.
## (Optional) bootstrapping a new integration package
## Implementing LangChain components
In this section, we will outline 2 options for bootstrapping a new integration package,
and you're welcome to use other tools if you prefer!
LangChain components are subclasses of base classes in [langchain-core](/docs/concepts/architecture/#langchain-core).
Examples include [chat models](/docs/concepts/chat_models/),
[vector stores](/docs/concepts/vectorstores/), [tools](/docs/concepts/tools/),
[embedding models](/docs/concepts/embedding_models/) and [retrievers](/docs/concepts/retrievers/).
1. **langchain-cli**: This is a command-line tool that can be used to bootstrap a new integration package with a template for LangChain components and Poetry for dependency management.
2. **Poetry**: This is a Python dependency management tool that can be used to bootstrap a new Python package with dependencies. You can then add LangChain components to this package.
Your integration package will typically implement a subclass of at least one of these
components. Expand the tabs below to see details on each.
<details>
<summary>Option 1: langchain-cli (recommended)</summary>
<summary>Chat models</summary>
In this guide, we will be using the `langchain-cli` to create a new integration package
from a template, which can be edited to implement your LangChain components.
Refer to the [Custom Chat Model Guide](/docs/how_to/custom_chat_model) guide for
detail on a starter chat model [implementation](/docs/how_to/custom_chat_model/#implementation).
### **Prerequisites**
:::tip
- [GitHub](https://github.com) account
- [PyPi](https://pypi.org/) account
The model from the [Custom Chat Model Guide](/docs/how_to/custom_chat_model) is tested
against the standard unit and integration tests in the LangChain Github repository.
You can also access that implementation directly from Github
[here](https://github.com/langchain-ai/langchain/blob/master/libs/standard-tests/tests/unit_tests/custom_chat_model.py).
### Boostrapping a new Python package with langchain-cli
First, install `langchain-cli` and `poetry`:
```bash
pip install langchain-cli poetry
```
Next, come up with a name for your package. For this guide, we'll use `langchain-parrot-link`.
You can confirm that the name is available on PyPi by searching for it on the [PyPi website](https://pypi.org/).
Next, create your new Python package with `langchain-cli`, and navigate into the new directory with `cd`:
```bash
langchain-cli integration new
> The name of the integration to create (e.g. `my-integration`): parrot-link
> Name of integration in PascalCase [ParrotLink]:
cd parrot-link
```
Next, let's add any dependencies we need
```bash
poetry add my-integration-sdk
```
We can also add some `typing` or `test` dependencies in a separate poetry dependency group.
```
poetry add --group typing my-typing-dep
poetry add --group test my-test-dep
```
And finally, have poetry set up a virtual environment with your dependencies, as well
as your integration package:
```bash
poetry install --with lint,typing,test,test_integration
```
You now have a new Python package with a template for LangChain components! This
template comes with files for each integration type, and you're welcome to duplicate or
delete any of these files as needed (including the associated test files).
To create any individual files from the [template], you can run e.g.:
```bash
langchain-cli integration new \
--name parrot-link \
--name-class ParrotLink \
--src integration_template/chat_models.py \
--dst langchain_parrot_link/chat_models_2.py
```
:::
</details>
<details>
<summary>Option 2: Poetry (manual)</summary>
<summary>Vector stores</summary>
Your vector store implementation will depend on your chosen database technology.
`langchain-core` includes a minimal
[in-memory vector store](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.in_memory.InMemoryVectorStore.html)
that we can use as a guide. You can access the code [here](https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/vectorstores/in_memory.py).
All vector stores must inherit from the [VectorStore](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.base.VectorStore.html)
base class. This interface consists of methods for writing, deleting and searching
for documents in the vector store.
`VectorStore` supports a variety of synchronous and asynchronous search types (e.g.,
nearest-neighbor or maximum marginal relevance), as well as interfaces for adding
documents to the store. See the [API Reference](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.base.VectorStore.html)
for all supported methods. The required methods are tabulated below:
| Method/Property | Description |
|------------------------ |------------------------------------------------------|
| `add_documents` | Add documents to the vector store. |
| `delete` | Delete selected documents from vector store (by IDs) |
| `get_by_ids` | Get selected documents from vector store (by IDs) |
| `similarity_search` | Get documents most similar to a query. |
| `embeddings` (property) | Embeddings object for vector store. |
| `from_texts` | Instantiate vector store via adding texts. |
Note that `InMemoryVectorStore` implements some optional search types, as well as
convenience methods for loading and dumping the object to a file, but this is not
necessary for all implementations.
:::tip
The [in-memory vector store](https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/vectorstores/in_memory.py)
is tested against the standard tests in the LangChain Github repository.
:::
</details>
<!-- <details>
<summary>Embeddings</summary>
</details>
<details>
<summary>Tools</summary>
</details>
<details>
<summary>Retrievers</summary>
</details>
<details>
<summary>Document Loaders</summary>
</details> -->
## (Optional) bootstrapping a new integration package
In this guide, we will be using [Poetry](https://python-poetry.org/) for
dependency management and packaging, and you're welcome to use any other tools you prefer.
@@ -175,8 +183,6 @@ later, following the [standard tests](../standard_tests) guide.
For `chat_models.py`, simply paste the contents of the chat model implementation
[above](#implementing-langchain-components).
</details>
### Push your package to a public Github repository
This is only required if you want to publish your integration in the LangChain documentation.
@@ -185,319 +191,6 @@ This is only required if you want to publish your integration in the LangChain d
2. Push your code to the repository.
3. Confirm that your repository is viewable by the public (e.g. in a private browsing window, where you're not logged into Github).
## Implementing LangChain components
LangChain components are subclasses of base classes in [langchain-core](/docs/concepts/architecture/#langchain-core).
Examples include [chat models](/docs/concepts/chat_models/),
[vector stores](/docs/concepts/vectorstores/), [tools](/docs/concepts/tools/),
[embedding models](/docs/concepts/embedding_models/) and [retrievers](/docs/concepts/retrievers/).
Your integration package will typically implement a subclass of at least one of these
components. Expand the tabs below to see details on each.
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import CodeBlock from '@theme/CodeBlock';
<Tabs>
<TabItem value="chat_models" label="Chat models">
Refer to the [Custom Chat Model Guide](/docs/how_to/custom_chat_model) guide for
detail on a starter chat model [implementation](/docs/how_to/custom_chat_model/#implementation).
You can start from the following template or langchain-cli command:
```bash
langchain-cli integration new \
--name parrot-link \
--name-class ParrotLink \
--src integration_template/chat_models.py \
--dst langchain_parrot_link/chat_models.py
```
<details>
<summary>Example chat model code</summary>
import ChatModelSource from '../../../../src/theme/integration_template/integration_template/chat_models.py';
<CodeBlock language="python" title="langchain_parrot_link/chat_models.py">
{
ChatModelSource.replaceAll('__ModuleName__', 'ParrotLink')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT_LINK')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
</details>
</TabItem>
<TabItem value="vector_stores" label="Vector stores">
Your vector store implementation will depend on your chosen database technology.
`langchain-core` includes a minimal
[in-memory vector store](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.in_memory.InMemoryVectorStore.html)
that we can use as a guide. You can access the code [here](https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/vectorstores/in_memory.py).
All vector stores must inherit from the [VectorStore](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.base.VectorStore.html)
base class. This interface consists of methods for writing, deleting and searching
for documents in the vector store.
`VectorStore` supports a variety of synchronous and asynchronous search types (e.g.,
nearest-neighbor or maximum marginal relevance), as well as interfaces for adding
documents to the store. See the [API Reference](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.base.VectorStore.html)
for all supported methods. The required methods are tabulated below:
| Method/Property | Description |
|------------------------ |------------------------------------------------------|
| `add_documents` | Add documents to the vector store. |
| `delete` | Delete selected documents from vector store (by IDs) |
| `get_by_ids` | Get selected documents from vector store (by IDs) |
| `similarity_search` | Get documents most similar to a query. |
| `embeddings` (property) | Embeddings object for vector store. |
| `from_texts` | Instantiate vector store via adding texts. |
Note that `InMemoryVectorStore` implements some optional search types, as well as
convenience methods for loading and dumping the object to a file, but this is not
necessary for all implementations.
:::tip
The [in-memory vector store](https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/vectorstores/in_memory.py)
is tested against the standard tests in the LangChain Github repository.
:::
<details>
<summary>Example vector store code</summary>
import VectorstoreSource from '../../../../src/theme/integration_template/integration_template/vectorstores.py';
<CodeBlock language="python" title="langchain_parrot_link/vectorstores.py">
{
VectorstoreSource.replaceAll('__ModuleName__', 'ParrotLink')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT_LINK')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
</details>
</TabItem>
<TabItem value="embeddings" label="Embeddings">
Embeddings are used to convert `str` objects from `Document.page_content` fields
into a vector representation (represented as a list of floats).
The `Embeddings` class must inherit from the [Embeddings](https://python.langchain.com/api_reference/core/embeddings/langchain_core.embeddings.embeddings.Embeddings.html#langchain_core.embeddings.embeddings.Embeddings)
base class. This interface has 5 methods that can be implemented.
| Method/Property | Description |
|------------------------ |------------------------------------------------------|
| `__init__` | Initialize the embeddings object. (optional) |
| `embed_query` | Embed a list of texts. (required) |
| `embed_documents` | Embed a list of documents. (required) |
| `aembed_query` | Asynchronously embed a list of texts. (optional) |
| `aembed_documents` | Asynchronously embed a list of documents. (optional) |
### Constructor
The `__init__` constructor is optional but common, but can be used to set up any necessary attributes
that a user can pass in when initializing the embeddings object. Common attributes include
- `model` - the id of the model to use for embeddings
### Embedding queries vs documents
The `embed_query` and `embed_documents` methods are required. These methods both operate
on string inputs (the accessing of `Document.page_content` attributes) is handled
by the VectorStore using the embedding model for legacy reasons.
`embed_query` takes in a single string and returns a single embedding as a list of floats.
If your model has different modes for embedding queries vs the underlying documents, you can
implement this method to handle that.
`embed_documents` takes in a list of strings and returns a list of embeddings as a list of lists of floats.
### Implementation
You can start from the following template or langchain-cli command:
```bash
langchain-cli integration new \
--name parrot-link \
--name-class ParrotLink \
--src integration_template/embeddings.py \
--dst langchain_parrot_link/embeddings.py
```
<details>
<summary>Example embeddings code</summary>
import EmbeddingsSource from '/src/theme/integration_template/integration_template/embeddings.py';
<CodeBlock language="python" title="langchain_parrot_link/embeddings.py">
{
EmbeddingsSource.replaceAll('__ModuleName__', 'ParrotLink')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT_LINK')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
</details>
</TabItem>
<TabItem value="tools" label="Tools">
Tools are used in 2 main ways:
1. To define an "input schema" or "args schema" to pass to a chat model's tool calling
feature along with a text request, such that the chat model can generate a "tool call",
or parameters to call the tool with.
2. To take a "tool call" as generated above, and take some action and return a response
that can be passed back to the chat model as a ToolMessage.
The `Tools` class must inherit from the [BaseTool](https://python.langchain.com/api_reference/core/tools/langchain_core.tools.base.BaseTool.html#langchain_core.tools.base.BaseTool) base class. This interface has 3 properties and 2 methods that should be implemented in a
subclass.
| Method/Property | Description |
|------------------------ |------------------------------------------------------|
| `name` | Name of the tool (passed to the LLM too). |
| `description` | Description of the tool (passed to the LLM too). |
| `args_schema` | Define the schema for the tool's input arguments. |
| `_run` | Run the tool with the given arguments. |
| `_arun` | Asynchronously run the tool with the given arguments.|
### Properties
`name`, `description`, and `args_schema` are all properties that should be implemented
in the subclass. `name` and `description` are strings that are used to identify the tool
and provide a description of what the tool does. Both of these are passed to the LLM,
and users may override these values depending on the LLM they are using as a form of
"prompt engineering." Giving these a concise and LLM-usable name and description is
important for the initial user experience of the tool.
`args_schema` is a Pydantic `BaseModel` that defines the schema for the tool's input
arguments. This is used to validate the input arguments to the tool, and to provide
a schema for the LLM to fill out when calling the tool. Similar to the `name` and
`description` of the overall Tool class, the fields' names (the variable name) and
description (part of `Field(..., description="description")`) are passed to the LLM,
and the values in these fields should be concise and LLM-usable.
### Run Methods
`_run` is the main method that should be implemented in the subclass. This method
takes in the arguments from `args_schema` and runs the tool, returning a string
response. This method is usually called in a LangGraph [`ToolNode`](https://langchain-ai.github.io/langgraph/how-tos/tool-calling/), and can also be called in a legacy
`langchain.agents.AgentExecutor`.
`_arun` is optional because by default, `_run` will be run in an async executor.
However, if your tool is calling any apis or doing any async work, you should implement
this method to run the tool asynchronously in addition to `_run`.
### Implementation
You can start from the following template or langchain-cli command:
```bash
langchain-cli integration new \
--name parrot-link \
--name-class ParrotLink \
--src integration_template/tools.py \
--dst langchain_parrot_link/tools.py
```
<details>
<summary>Example tool code</summary>
import ToolSource from '/src/theme/integration_template/integration_template/tools.py';
<CodeBlock language="python" title="langchain_parrot_link/tools.py">
{
ToolSource.replaceAll('__ModuleName__', 'ParrotLink')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT_LINK')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
</details>
</TabItem>
<TabItem value="retrievers" label="Retrievers">
Retrievers are used to retrieve documents from APIs, databases, or other sources
based on a query. The `Retriever` class must inherit from the [BaseRetriever](https://python.langchain.com/api_reference/core/retrievers/langchain_core.retrievers.BaseRetriever.html) base class. This interface has 1 attribute and 2 methods that should be implemented in a subclass.
| Method/Property | Description |
|------------------------ |------------------------------------------------------|
| `k` | Default number of documents to retrieve (configurable). |
| `_get_relevant_documents`| Retrieve documents based on a query. |
| `_aget_relevant_documents`| Asynchronously retrieve documents based on a query. |
### Attributes
`k` is an attribute that should be implemented in the subclass. This attribute
can simply be defined at the top of the class with a default value like
`k: int = 5`. This attribute is the default number of documents to retrieve
from the retriever, and can be overridden by the user when constructing or calling
the retriever.
### Methods
`_get_relevant_documents` is the main method that should be implemented in the subclass.
This method takes in a query and returns a list of `Document` objects, which have 2
main properties:
- `page_content` - the text content of the document
- `metadata` - a dictionary of metadata about the document
Retrievers are typically directly invoked by a user, e.g. as
`MyRetriever(k=4).invoke("query")`, which will automatically call `_get_relevant_documents`
under the hood.
`_aget_relevant_documents` is optional because by default, `_get_relevant_documents` will
be run in an async executor. However, if your retriever is calling any apis or doing
any async work, you should implement this method to run the retriever asynchronously
in addition to `_get_relevant_documents` for performance reasons.
### Implementation
You can start from the following template or langchain-cli command:
```bash
langchain-cli integration new \
--name parrot-link \
--name-class ParrotLink \
--src integration_template/retrievers.py \
--dst langchain_parrot_link/retrievers.py
```
<details>
<summary>Example retriever code</summary>
import RetrieverSource from '/src/theme/integration_template/integration_template/retrievers.py';
<CodeBlock language="python" title="langchain_parrot_link/retrievers.py">
{
RetrieverSource.replaceAll('__ModuleName__', 'ParrotLink')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT_LINK')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
</details>
</TabItem>
</Tabs>
---
## Next Steps
Now that you've implemented your package, you can move on to [testing your integration](../standard_tests) for your integration and successfully run them.

View File

@@ -0,0 +1,600 @@
{
"cells": [
{
"cell_type": "raw",
"metadata": {
"vscode": {
"languageId": "raw"
}
},
"source": [
"---\n",
"pagination_next: contributing/how_to/integrations/publish\n",
"pagination_prev: contributing/how_to/integrations/package\n",
"---"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# How to add standard tests to an integration\n",
"\n",
"When creating either a custom class for yourself or to publish in a LangChain integration, it is important to add standard tests to ensure it works as expected. This guide will show you how to add standard tests to a custom chat model, and you can **[Skip to the test templates](#standard-test-templates-per-component)** for implementing tests for each integration type.\n",
"\n",
"## Setup\n",
"\n",
"If you're coming from the [previous guide](../package), you have already installed these dependencies, and you can skip this section.\n",
"\n",
"First, let's install 2 dependencies:\n",
"\n",
"- `langchain-core` will define the interfaces we want to import to define our custom tool.\n",
"- `langchain-tests` will provide the standard tests we want to use. Recommended to pin to the latest version: <img src=\"https://img.shields.io/pypi/v/langchain-tests\" style={{position:\"relative\",top:4,left:3}} />\n",
"\n",
":::note\n",
"\n",
"Because added tests in new versions of `langchain-tests` can break your CI/CD pipelines, we recommend pinning the \n",
"version of `langchain-tests` to avoid unexpected changes.\n",
"\n",
":::\n",
"\n",
"import Tabs from '@theme/Tabs';\n",
"import TabItem from '@theme/TabItem';\n",
"\n",
"<Tabs>\n",
" <TabItem value=\"poetry\" label=\"Poetry\" default>\n",
"If you followed the [previous guide](../package), you should already have these dependencies installed!\n",
"\n",
"```bash\n",
"poetry add langchain-core\n",
"poetry add --group test pytest pytest-socket pytest-asyncio langchain-tests==<latest_version>\n",
"poetry install --with test\n",
"```\n",
" </TabItem>\n",
" <TabItem value=\"pip\" label=\"Pip\">\n",
"```bash\n",
"pip install -U langchain-core pytest pytest-socket pytest-asyncio langchain-tests\n",
"\n",
"# install current package in editable mode\n",
"pip install --editable .\n",
"```\n",
" </TabItem>\n",
"</Tabs>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's say we're publishing a package, `langchain_parrot_link`, that exposes the chat model from the [guide on implementing the package](../package). We can add the standard tests to the package by following the steps below."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"And we'll assume you've structured your package the same way as the main LangChain\n",
"packages:\n",
"\n",
"```plaintext\n",
"langchain-parrot-link/\n",
"├── langchain_parrot_link/\n",
"│ ├── __init__.py\n",
"│ └── chat_models.py\n",
"├── tests/\n",
"│ ├── __init__.py\n",
"│ └── test_chat_models.py\n",
"├── pyproject.toml\n",
"└── README.md\n",
"```\n",
"\n",
"## Add and configure standard tests\n",
"\n",
"There are 2 namespaces in the `langchain-tests` package: \n",
"\n",
"- [unit tests](../../../concepts/testing.mdx#unit-tests) (`langchain_tests.unit_tests`): designed to be used to test the component in isolation and without access to external services\n",
"- [integration tests](../../../concepts/testing.mdx#unit-tests) (`langchain_tests.integration_tests`): designed to be used to test the component with access to external services (in particular, the external service that the component is designed to interact with).\n",
"\n",
"Both types of tests are implemented as [`pytest` class-based test suites](https://docs.pytest.org/en/7.1.x/getting-started.html#group-multiple-tests-in-a-class).\n",
"\n",
"By subclassing the base classes for each type of standard test (see below), you get all of the standard tests for that type, and you\n",
"can override the properties that the test suite uses to configure the tests.\n",
"\n",
"### Standard chat model tests\n",
"\n",
"Here's how you would configure the standard unit tests for the custom chat model:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# title=\"tests/unit_tests/test_chat_models.py\"\n",
"from typing import Tuple, Type\n",
"\n",
"from langchain_parrot_link.chat_models import ChatParrotLink\n",
"from langchain_tests.unit_tests import ChatModelUnitTests\n",
"\n",
"\n",
"class TestChatParrotLinkUnit(ChatModelUnitTests):\n",
" @property\n",
" def chat_model_class(self) -> Type[ChatParrotLink]:\n",
" return ChatParrotLink\n",
"\n",
" @property\n",
" def chat_model_params(self) -> dict:\n",
" return {\n",
" \"model\": \"bird-brain-001\",\n",
" \"temperature\": 0,\n",
" \"parrot_buffer_length\": 50,\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# title=\"tests/integration_tests/test_chat_models.py\"\n",
"from typing import Type\n",
"\n",
"from langchain_parrot_link.chat_models import ChatParrotLink\n",
"from langchain_tests.integration_tests import ChatModelIntegrationTests\n",
"\n",
"\n",
"class TestChatParrotLinkIntegration(ChatModelIntegrationTests):\n",
" @property\n",
" def chat_model_class(self) -> Type[ChatParrotLink]:\n",
" return ChatParrotLink\n",
"\n",
" @property\n",
" def chat_model_params(self) -> dict:\n",
" return {\n",
" \"model\": \"bird-brain-001\",\n",
" \"temperature\": 0,\n",
" \"parrot_buffer_length\": 50,\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"and you would run these with the following commands from your project root\n",
"\n",
"<Tabs>\n",
" <TabItem value=\"poetry\" label=\"Poetry\" default>\n",
"\n",
"```bash\n",
"# run unit tests without network access\n",
"poetry run pytest --disable-socket --allow-unix-socket --asyncio-mode=auto tests/unit_tests\n",
"\n",
"# run integration tests\n",
"poetry run pytest --asyncio-mode=auto tests/integration_tests\n",
"```\n",
"\n",
" </TabItem>\n",
" <TabItem value=\"pip\" label=\"Pip\">\n",
"\n",
"```bash\n",
"# run unit tests without network access\n",
"pytest --disable-socket --allow-unix-socket --asyncio-mode=auto tests/unit_tests\n",
"\n",
"# run integration tests\n",
"pytest --asyncio-mode=auto tests/integration_tests\n",
"```\n",
"\n",
" </TabItem>\n",
"</Tabs>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test suite information and troubleshooting\n",
"\n",
"For a full list of the standard test suites that are available, as well as\n",
"information on which tests are included and how to troubleshoot common issues,\n",
"see the [Standard Tests API Reference](https://python.langchain.com/api_reference/standard_tests/index.html).\n",
"\n",
"An increasing number of troubleshooting guides are being added to this documentation,\n",
"and if you're interested in contributing, feel free to add docstrings to tests in \n",
"[Github](https://github.com/langchain-ai/langchain/tree/master/libs/standard-tests/langchain_tests)!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Standard test templates per component:\n",
"\n",
"Above, we implement the **unit** and **integration** standard tests for a tool. Below are the templates for implementing the standard tests for each component:\n",
"\n",
"<details>\n",
" <summary>Chat Models</summary>\n",
" <p>Note: The standard tests for chat models are implemented in the example in the main body of this guide too.</p>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Chat model standard tests test a range of behaviors, from the most basic requirements (generating a response to a query) to optional capabilities like multi-modal support and tool-calling. For a test run to be successful:\n",
"\n",
"1. If a feature is intended to be supported by the model, it should pass;\n",
"2. If a feature is not intended to be supported by the model, it should be skipped.\n",
"\n",
"Tests for \"optional\" capabilities are controlled via a set of properties that can be overridden on the test model subclass.\n",
"\n",
"You can see the entire list of properties in the API reference [here](https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.chat_models.ChatModelTests.html). These properties are shared by both unit and integration tests.\n",
"\n",
"For example, to enable integration tests for image inputs, we can implement\n",
"\n",
"```python\n",
"@property\n",
"def supports_image_inputs(self) -> bool:\n",
" return True\n",
"```\n",
"\n",
"on the integration test class.\n",
"\n",
":::note\n",
"\n",
"Details on what tests are run, how each test can be skipped, and troubleshooting tips for each test can be found in the API references. See details:\n",
"\n",
"- [Unit tests API reference](https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.chat_models.ChatModelUnitTests.html)\n",
"- [Integration tests API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.html)\n",
"\n",
":::\n",
"\n",
"Unit test example:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# title=\"tests/unit_tests/test_chat_models.py\"\n",
"from typing import Type\n",
"\n",
"from langchain_parrot_link.chat_models import ChatParrotLink\n",
"from langchain_tests.unit_tests import ChatModelUnitTests\n",
"\n",
"\n",
"class TestChatParrotLinkUnit(ChatModelUnitTests):\n",
" @property\n",
" def chat_model_class(self) -> Type[ChatParrotLink]:\n",
" return ChatParrotLink\n",
"\n",
" @property\n",
" def chat_model_params(self) -> dict:\n",
" return {\n",
" \"model\": \"bird-brain-001\",\n",
" \"temperature\": 0,\n",
" \"parrot_buffer_length\": 50,\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Integration test example:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# title=\"tests/integration_tests/test_chat_models.py\"\n",
"from typing import Type\n",
"\n",
"from langchain_parrot_link.chat_models import ChatParrotLink\n",
"from langchain_tests.integration_tests import ChatModelIntegrationTests\n",
"\n",
"\n",
"class TestChatParrotLinkIntegration(ChatModelIntegrationTests):\n",
" @property\n",
" def chat_model_class(self) -> Type[ChatParrotLink]:\n",
" return ChatParrotLink\n",
"\n",
" @property\n",
" def chat_model_params(self) -> dict:\n",
" return {\n",
" \"model\": \"bird-brain-001\",\n",
" \"temperature\": 0,\n",
" \"parrot_buffer_length\": 50,\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"</details>\n",
"<details>\n",
" <summary>Embedding Models</summary>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# title=\"tests/unit_tests/test_embeddings.py\"\n",
"from typing import Tuple, Type\n",
"\n",
"from langchain_parrot_link.embeddings import ParrotLinkEmbeddings\n",
"from langchain_tests.unit_tests import EmbeddingsUnitTests\n",
"\n",
"\n",
"class TestParrotLinkEmbeddingsUnit(EmbeddingsUnitTests):\n",
" @property\n",
" def embeddings_class(self) -> Type[ParrotLinkEmbeddings]:\n",
" return ParrotLinkEmbeddings\n",
"\n",
" @property\n",
" def embedding_model_params(self) -> dict:\n",
" return {\"model\": \"nest-embed-001\", \"temperature\": 0}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# title=\"tests/integration_tests/test_embeddings.py\"\n",
"from typing import Type\n",
"\n",
"from langchain_parrot_link.embeddings import ParrotLinkEmbeddings\n",
"from langchain_tests.integration_tests import EmbeddingsIntegrationTests\n",
"\n",
"\n",
"class TestParrotLinkEmbeddingsIntegration(EmbeddingsIntegrationTests):\n",
" @property\n",
" def embeddings_class(self) -> Type[ParrotLinkEmbeddings]:\n",
" return ParrotLinkEmbeddings\n",
"\n",
" @property\n",
" def embedding_model_params(self) -> dict:\n",
" return {\"model\": \"nest-embed-001\"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"</details>\n",
"<details>\n",
" <summary>Tools/Toolkits</summary>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# title=\"tests/unit_tests/test_tools.py\"\n",
"from typing import Type\n",
"\n",
"from langchain_parrot_link.tools import ParrotMultiplyTool\n",
"from langchain_tests.unit_tests import ToolsUnitTests\n",
"\n",
"\n",
"class TestParrotMultiplyToolUnit(ToolsUnitTests):\n",
" @property\n",
" def tool_constructor(self) -> Type[ParrotMultiplyTool]:\n",
" return ParrotMultiplyTool\n",
"\n",
" @property\n",
" def tool_constructor_params(self) -> dict:\n",
" # if your tool constructor instead required initialization arguments like\n",
" # `def __init__(self, some_arg: int):`, you would return those here\n",
" # as a dictionary, e.g.: `return {'some_arg': 42}`\n",
" return {}\n",
"\n",
" @property\n",
" def tool_invoke_params_example(self) -> dict:\n",
" \"\"\"\n",
" Returns a dictionary representing the \"args\" of an example tool call.\n",
"\n",
" This should NOT be a ToolCall dict - i.e. it should not\n",
" have {\"name\", \"id\", \"args\"} keys.\n",
" \"\"\"\n",
" return {\"a\": 2, \"b\": 3}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# title=\"tests/integration_tests/test_tools.py\"\n",
"from typing import Type\n",
"\n",
"from langchain_parrot_link.tools import ParrotMultiplyTool\n",
"from langchain_tests.integration_tests import ToolsIntegrationTests\n",
"\n",
"\n",
"class TestParrotMultiplyToolIntegration(ToolsIntegrationTests):\n",
" @property\n",
" def tool_constructor(self) -> Type[ParrotMultiplyTool]:\n",
" return ParrotMultiplyTool\n",
"\n",
" @property\n",
" def tool_constructor_params(self) -> dict:\n",
" # if your tool constructor instead required initialization arguments like\n",
" # `def __init__(self, some_arg: int):`, you would return those here\n",
" # as a dictionary, e.g.: `return {'some_arg': 42}`\n",
" return {}\n",
"\n",
" @property\n",
" def tool_invoke_params_example(self) -> dict:\n",
" \"\"\"\n",
" Returns a dictionary representing the \"args\" of an example tool call.\n",
"\n",
" This should NOT be a ToolCall dict - i.e. it should not\n",
" have {\"name\", \"id\", \"args\"} keys.\n",
" \"\"\"\n",
" return {\"a\": 2, \"b\": 3}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"</details>\n",
"<details>\n",
" <summary>Vector Stores</summary>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here's how you would configure the standard tests for a typical vector store (using\n",
"`ParrotVectorStore` as a placeholder):"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# title=\"tests/integration_tests/test_vectorstores_sync.py\"\n",
"\n",
"from typing import AsyncGenerator, Generator\n",
"\n",
"import pytest\n",
"from langchain_core.vectorstores import VectorStore\n",
"from langchain_parrot_link.vectorstores import ParrotVectorStore\n",
"from langchain_standard_tests.integration_tests.vectorstores import (\n",
" AsyncReadWriteTestSuite,\n",
" ReadWriteTestSuite,\n",
")\n",
"\n",
"\n",
"class TestSync(ReadWriteTestSuite):\n",
" @pytest.fixture()\n",
" def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore\n",
" \"\"\"Get an empty vectorstore for unit tests.\"\"\"\n",
" store = ParrotVectorStore()\n",
" # note: store should be EMPTY at this point\n",
" # if you need to delete data, you may do so here\n",
" try:\n",
" yield store\n",
" finally:\n",
" # cleanup operations, or deleting data\n",
" pass\n",
"\n",
"\n",
"class TestAsync(AsyncReadWriteTestSuite):\n",
" @pytest.fixture()\n",
" async def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore\n",
" \"\"\"Get an empty vectorstore for unit tests.\"\"\"\n",
" store = ParrotVectorStore()\n",
" # note: store should be EMPTY at this point\n",
" # if you need to delete data, you may do so here\n",
" try:\n",
" yield store\n",
" finally:\n",
" # cleanup operations, or deleting data\n",
" pass"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"There are separate suites for testing synchronous and asynchronous methods.\n",
"Configuring the tests consists of implementing pytest fixtures for setting up an\n",
"empty vector store and tearing down the vector store after the test run ends.\n",
"\n",
"For example, below is the `ReadWriteTestSuite` for the [Chroma](https://python.langchain.com/docs/integrations/vectorstores/chroma/)\n",
"integration:\n",
"\n",
"```python\n",
"from typing import Generator\n",
"\n",
"import pytest\n",
"from langchain_core.vectorstores import VectorStore\n",
"from langchain_tests.integration_tests.vectorstores import ReadWriteTestSuite\n",
"\n",
"from langchain_chroma import Chroma\n",
"\n",
"\n",
"class TestSync(ReadWriteTestSuite):\n",
" @pytest.fixture()\n",
" def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore\n",
" \"\"\"Get an empty vectorstore.\"\"\"\n",
" store = Chroma(embedding_function=self.get_embeddings())\n",
" try:\n",
" yield store\n",
" finally:\n",
" store.delete_collection()\n",
" pass\n",
"```\n",
"\n",
"Note that before the initial `yield`, we instantiate the vector store with an\n",
"[embeddings](/docs/concepts/embedding_models/) object. This is a pre-defined\n",
"[\"fake\" embeddings model](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.ReadWriteTestSuite.html#langchain_tests.integration_tests.vectorstores.ReadWriteTestSuite.get_embeddings)\n",
"that will generate short, arbitrary vectors for documents. You can use a different\n",
"embeddings object if desired.\n",
"\n",
"In the `finally` block, we call whatever integration-specific logic is needed to\n",
"bring the vector store to a clean state. This logic is executed in between each test\n",
"(e.g., even if tests fail).\n",
"\n",
":::note\n",
"\n",
"Details on what tests are run, how each test can be skipped, and troubleshooting tips for each test can be found in the API references. See details:\n",
"\n",
"- [Sync tests API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.ReadWriteTestSuite.html)\n",
"- [Async tests API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.AsyncReadWriteTestSuite.html)\n",
"\n",
":::"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"</details>"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,393 +0,0 @@
---
pagination_next: contributing/how_to/integrations/publish
pagination_prev: contributing/how_to/integrations/package
---
# How to add standard tests to an integration
When creating either a custom class for yourself or to publish in a LangChain integration, it is important to add standard tests to ensure it works as expected. This guide will show you how to add standard tests to each integration type.
## Setup
First, let's install 2 dependencies:
- `langchain-core` will define the interfaces we want to import to define our custom tool.
- `langchain-tests` will provide the standard tests we want to use, as well as pytest plugins necessary to run them. Recommended to pin to the latest version: <img src="https://img.shields.io/pypi/v/langchain-tests" style={{position:"relative",top:4,left:3}} />
:::note
Because added tests in new versions of `langchain-tests` can break your CI/CD pipelines, we recommend pinning the
version of `langchain-tests` to avoid unexpected changes.
:::
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
<Tabs>
<TabItem value="poetry" label="Poetry" default>
If you followed the [previous guide](../package), you should already have these dependencies installed!
```bash
poetry add langchain-core
poetry add --group test langchain-tests==<latest_version>
poetry install --with test
```
</TabItem>
<TabItem value="pip" label="Pip">
```bash
pip install -U langchain-core langchain-tests
# install current package in editable mode
pip install --editable .
```
</TabItem>
</Tabs>
## Add and configure standard tests
There are 2 namespaces in the `langchain-tests` package:
- [unit tests](../../../concepts/testing.mdx#unit-tests) (`langchain_tests.unit_tests`): designed to be used to test the component in isolation and without access to external services
- [integration tests](../../../concepts/testing.mdx#integration-tests) (`langchain_tests.integration_tests`): designed to be used to test the component with access to external services (in particular, the external service that the component is designed to interact with).
Both types of tests are implemented as [`pytest` class-based test suites](https://docs.pytest.org/en/7.1.x/getting-started.html#group-multiple-tests-in-a-class).
By subclassing the base classes for each type of standard test (see below), you get all of the standard tests for that type, and you
can override the properties that the test suite uses to configure the tests.
In order to run the tests in the same way as this guide, we recommend subclassing these
classes in test files under two test subdirectories:
- `tests/unit_tests` for unit tests
- `tests/integration_tests` for integration tests
### Implementing standard tests
import CodeBlock from '@theme/CodeBlock';
In the following tabs, we show how to implement the standard tests for
each component type:
<Tabs>
<TabItem value="chat_models" label="Chat models">
To configure standard tests for a chat model, we subclass `ChatModelUnitTests` and `ChatModelIntegrationTests`. On each subclass, we override the following `@property` methods to specify the chat model to be tested and the chat model's configuration:
| Property | Description |
| --- | --- |
| `chat_model_class` | The class for the chat model to be tested |
| `chat_model_params` | The parameters to pass to the chat
model's constructor |
Additionally, chat model standard tests test a range of behaviors, from the most basic requirements (generating a response to a query) to optional capabilities like multi-modal support and tool-calling. For a test run to be successful:
1. If a feature is intended to be supported by the model, it should pass;
2. If a feature is not intended to be supported by the model, it should be skipped.
Tests for "optional" capabilities are controlled via a set of properties that can be overridden on the test model subclass.
You can see the **entire list of configurable capabilities** in the API references for
[unit tests](https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.chat_models.ChatModelUnitTests.html)
and [integration tests](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.html).
For example, to enable integration tests for image inputs, we can implement
```python
@property
def supports_image_inputs(self) -> bool:
return True
```
on the integration test class.
:::note
Details on what tests are run, how each test can be skipped, and troubleshooting tips for each test can be found in the API references. See details:
- [Unit tests API reference](https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.chat_models.ChatModelUnitTests.html)
- [Integration tests API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.html)
:::
Unit test example:
import ChatUnitSource from '../../../../src/theme/integration_template/tests/unit_tests/test_chat_models.py';
<CodeBlock language="python" title="tests/unit_tests/test_chat_models.py">
{
ChatUnitSource.replaceAll('__ModuleName__', 'ParrotLink')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT_LINK')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
Integration test example:
import ChatIntegrationSource from '../../../../src/theme/integration_template/tests/integration_tests/test_chat_models.py';
<CodeBlock language="python" title="tests/integration_tests/test_chat_models.py">
{
ChatIntegrationSource.replaceAll('__ModuleName__', 'ParrotLink')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT_LINK')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
</TabItem>
<TabItem value="vector_stores" label="Vector stores">
Here's how you would configure the standard tests for a typical vector store (using
`ParrotVectorStore` as a placeholder):
Vector store tests do not have optional capabilities to be configured at this time.
import VectorStoreIntegrationSource from '../../../../src/theme/integration_template/tests/integration_tests/test_vectorstores.py';
<CodeBlock language="python" title="tests/integration_tests/test_vectorstores.py">
{
VectorStoreIntegrationSource.replaceAll('__ModuleName__', 'Parrot')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
Configuring the tests consists of implementing pytest fixtures for setting up an
empty vector store and tearing down the vector store after the test run ends.
| Fixture | Description |
| --- | --- |
| `vectorstore` | A generator that yields an empty vector store for unit tests. The vector store is cleaned up after the test run ends. |
For example, below is the `VectorStoreIntegrationTests` class for the [Chroma](https://python.langchain.com/docs/integrations/vectorstores/chroma/)
integration:
```python
from typing import Generator
import pytest
from langchain_core.vectorstores import VectorStore
from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests
from langchain_chroma import Chroma
class TestChromaStandard(VectorStoreIntegrationTests):
@pytest.fixture()
def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore
"""Get an empty vectorstore for unit tests."""
store = Chroma(embedding_function=self.get_embeddings())
try:
yield store
finally:
store.delete_collection()
pass
```
Note that before the initial `yield`, we instantiate the vector store with an
[embeddings](/docs/concepts/embedding_models/) object. This is a pre-defined
["fake" embeddings model](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.VectorStoreIntegrationTests.html#langchain_tests.integration_tests.vectorstores.VectorStoreIntegrationTests.get_embeddings)
that will generate short, arbitrary vectors for documents. You can use a different
embeddings object if desired.
In the `finally` block, we call whatever integration-specific logic is needed to
bring the vector store to a clean state. This logic is executed in between each test
(e.g., even if tests fail).
:::note
Details on what tests are run and troubleshooting tips for each test can be found in the [API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.VectorStoreIntegrationTests.html).
:::
</TabItem>
<TabItem value="embeddings" label="Embeddings">
To configure standard tests for an embeddings model, we subclass `EmbeddingsUnitTests` and `EmbeddingsIntegrationTests`. On each subclass, we override the following `@property` methods to specify the embeddings model to be tested and the embeddings model's configuration:
| Property | Description |
| --- | --- |
| `embeddings_class` | The class for the embeddings model to be tested |
| `embedding_model_params` | The parameters to pass to the embeddings model's constructor |
:::note
Details on what tests are run, how each test can be skipped, and troubleshooting tips for each test can be found in the API references. See details:
- [Unit tests API reference](https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.embeddings.EmbeddingsUnitTests.html)
- [Integration tests API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.embeddings.EmbeddingsIntegrationTests.html)
:::
Unit test example:
import EmbeddingsUnitSource from '../../../../src/theme/integration_template/tests/unit_tests/test_embeddings.py';
<CodeBlock language="python" title="tests/unit_tests/test_embeddings.py">
{
EmbeddingsUnitSource.replaceAll('__ModuleName__', 'ParrotLink')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT_LINK')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
Integration test example:
```python title="tests/integration_tests/test_embeddings.py"
from typing import Type
from langchain_parrot_link.embeddings import ParrotLinkEmbeddings
from langchain_tests.integration_tests import EmbeddingsIntegrationTests
class TestParrotLinkEmbeddingsIntegration(EmbeddingsIntegrationTests):
@property
def embeddings_class(self) -> Type[ParrotLinkEmbeddings]:
return ParrotLinkEmbeddings
@property
def embedding_model_params(self) -> dict:
return {"model": "nest-embed-001"}
```
import EmbeddingsIntegrationSource from '../../../../src/theme/integration_template/tests/integration_tests/test_embeddings.py';
<CodeBlock language="python" title="tests/integration_tests/test_embeddings.py">
{
EmbeddingsIntegrationSource.replaceAll('__ModuleName__', 'ParrotLink')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT_LINK')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
</TabItem>
<TabItem value="tools" label="Tools">
To configure standard tests for a tool, we subclass `ToolsUnitTests` and
`ToolsIntegrationTests`. On each subclass, we override the following `@property` methods
to specify the tool to be tested and the tool's configuration:
| Property | Description |
| --- | --- |
| `tool_constructor` | The constructor for the tool to be tested, or an instantiated tool. |
| `tool_constructor_params` | The parameters to pass to the tool (optional). |
| `tool_invoke_params_example` | An example of the parameters to pass to the tool's `invoke` method. |
If you are testing a tool class and pass a class like `MyTool` to `tool_constructor`, you can pass the parameters to the constructor in `tool_constructor_params`.
If you are testing an instantiated tool, you can pass the instantiated tool to `tool_constructor` and do not
override `tool_constructor_params`.
:::note
Details on what tests are run, how each test can be skipped, and troubleshooting tips for each test can be found in the API references. See details:
- [Unit tests API reference](https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.tools.ToolsUnitTests.html)
- [Integration tests API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.tools.ToolsIntegrationTests.html)
:::
import ToolsUnitSource from '../../../../src/theme/integration_template/tests/unit_tests/test_tools.py';
<CodeBlock language="python" title="tests/unit_tests/test_tools.py">
{
ToolsUnitSource.replaceAll('__ModuleName__', 'Parrot')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
import ToolsIntegrationSource from '../../../../src/theme/integration_template/tests/integration_tests/test_tools.py';
<CodeBlock language="python" title="tests/integration_tests/test_tools.py">
{
ToolsIntegrationSource.replaceAll('__ModuleName__', 'Parrot')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
</TabItem>
<TabItem value="retrievers" label="Retrievers">
To configure standard tests for a retriever, we subclass `RetrieversUnitTests` and
`RetrieversIntegrationTests`. On each subclass, we override the following `@property` methods
| Property | Description |
| --- | --- |
| `retriever_constructor` | The class for the retriever to be tested |
| `retriever_constructor_params` | The parameters to pass to the retriever's constructor |
| `retriever_query_example` | An example of the query to pass to the retriever's `invoke` method |
:::note
Details on what tests are run and troubleshooting tips for each test can be found in the [API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.retrievers.RetrieversIntegrationTests.html).
:::
import RetrieverIntegrationSource from '../../../../src/theme/integration_template/tests/integration_tests/test_retrievers.py';
<CodeBlock language="python" title="tests/integration_tests/test_retrievers.py">
{
RetrieverIntegrationSource.replaceAll('__ModuleName__', 'Parrot')
.replaceAll('__package_name__', 'langchain-parrot-link')
.replaceAll('__MODULE_NAME__', 'PARROT')
.replaceAll('__module_name__', 'langchain_parrot_link')
}
</CodeBlock>
</TabItem>
</Tabs>
---
### Running the tests
You can run these with the following commands from your project root
<Tabs>
<TabItem value="poetry" label="Poetry" default>
```bash
# run unit tests without network access
poetry run pytest --disable-socket --allow-unix-socket --asyncio-mode=auto tests/unit_tests
# run integration tests
poetry run pytest --asyncio-mode=auto tests/integration_tests
```
</TabItem>
<TabItem value="pip" label="Pip">
```bash
# run unit tests without network access
pytest --disable-socket --allow-unix-socket --asyncio-mode=auto tests/unit_tests
# run integration tests
pytest --asyncio-mode=auto tests/integration_tests
```
</TabItem>
</Tabs>
## Test suite information and troubleshooting
For a full list of the standard test suites that are available, as well as
information on which tests are included and how to troubleshoot common issues,
see the [Standard Tests API Reference](https://python.langchain.com/api_reference/standard_tests/index.html).
You can see troubleshooting guides under the individual test suites listed in that API Reference. For example,
[here is the guide for `ChatModelIntegrationTests.test_usage_metadata`](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.html#langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_usage_metadata).

View File

@@ -802,7 +802,7 @@
"That's a wrap! In this quick start we covered how to create a simple agent. Agents are a complex topic, and there's lot to learn! \n",
"\n",
":::important\n",
"This section covered building with LangChain Agents. They are fine for getting started, but past a certain point you will likely want flexibility and control which they do not offer. To develop more advanced agents, we recommend checking out [LangGraph](/docs/concepts/architecture/#langgraph)\n",
"This section covered building with LangChain Agents. LangChain Agents are fine for getting started, but past a certain point you will likely want flexibility and control that they do not offer. For working with more advanced agents, we'd reccommend checking out [LangGraph](/docs/concepts/architecture/#langgraph)\n",
":::\n",
"\n",
"If you want to continue using LangChain agents, some good advanced guides are:\n",

View File

@@ -294,7 +294,7 @@
"metadata": {},
"source": [
":::caution\n",
"By default, `@tool(parse_docstring=True)` will raise `ValueError` if the docstring does not parse correctly. See [API Reference](https://python.langchain.com/api_reference/core/tools/langchain_core.tools.convert.tool.html) for detail and examples.\n",
"By default, `@tool(parse_docstring=True)` will raise `ValueError` if the docstring does not parse correctly. See [API Reference](https://python.langchain.com/api_reference/core/tools/langchain_core.tools.tool.html) for detail and examples.\n",
":::"
]
},

View File

@@ -0,0 +1,459 @@
{
"cells": [
{
"cell_type": "raw",
"id": "5e61b0f2-15b9-4241-9ab5-ff0f3f732232",
"metadata": {},
"source": [
"---\n",
"sidebar_position: 1\n",
"---"
]
},
{
"cell_type": "markdown",
"id": "846ef4f4-ee38-4a42-a7d3-1a23826e4830",
"metadata": {},
"source": [
"# How to map values to a graph database\n",
"\n",
"In this guide we'll go over strategies to improve graph database query generation by mapping values from user inputs to database.\n",
"When using the built-in graph chains, the LLM is aware of the graph schema, but has no information about the values of properties stored in the database.\n",
"Therefore, we can introduce a new step in graph database QA system to accurately map values.\n",
"\n",
"## Setup\n",
"\n",
"First, get required packages and set environment variables:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "18294435-182d-48da-bcab-5b8945b6d9cf",
"metadata": {},
"outputs": [],
"source": [
"%pip install --upgrade --quiet langchain langchain-neo4j langchain-openai neo4j"
]
},
{
"cell_type": "markdown",
"id": "d86dd771-4001-4a34-8680-22e9b50e1e88",
"metadata": {},
"source": [
"We default to OpenAI models in this guide, but you can swap them out for the model provider of your choice."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9346f8e9-78bf-4667-b3d3-72807a73b718",
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
" ········\n"
]
}
],
"source": [
"import getpass\n",
"import os\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n",
"\n",
"# Uncomment the below to use LangSmith. Not required.\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\""
]
},
{
"cell_type": "markdown",
"id": "271c8a23-e51c-4ead-a76e-cf21107db47e",
"metadata": {},
"source": [
"Next, we need to define Neo4j credentials.\n",
"Follow [these installation steps](https://neo4j.com/docs/operations-manual/current/installation/) to set up a Neo4j database."
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a2a3bb65-05c7-4daf-bac2-b25ae7fe2751",
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"NEO4J_URI\"] = \"bolt://localhost:7687\"\n",
"os.environ[\"NEO4J_USERNAME\"] = \"neo4j\"\n",
"os.environ[\"NEO4J_PASSWORD\"] = \"password\""
]
},
{
"cell_type": "markdown",
"id": "50fa4510-29b7-49b6-8496-5e86f694e81f",
"metadata": {},
"source": [
"The below example will create a connection with a Neo4j database and will populate it with example data about movies and their actors."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "4ee9ef7a-eef9-4289-b9fd-8fbc31041688",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain_neo4j import Neo4jGraph\n",
"\n",
"graph = Neo4jGraph()\n",
"\n",
"# Import movie information\n",
"\n",
"movies_query = \"\"\"\n",
"LOAD CSV WITH HEADERS FROM \n",
"'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv'\n",
"AS row\n",
"MERGE (m:Movie {id:row.movieId})\n",
"SET m.released = date(row.released),\n",
" m.title = row.title,\n",
" m.imdbRating = toFloat(row.imdbRating)\n",
"FOREACH (director in split(row.director, '|') | \n",
" MERGE (p:Person {name:trim(director)})\n",
" MERGE (p)-[:DIRECTED]->(m))\n",
"FOREACH (actor in split(row.actors, '|') | \n",
" MERGE (p:Person {name:trim(actor)})\n",
" MERGE (p)-[:ACTED_IN]->(m))\n",
"FOREACH (genre in split(row.genres, '|') | \n",
" MERGE (g:Genre {name:trim(genre)})\n",
" MERGE (m)-[:IN_GENRE]->(g))\n",
"\"\"\"\n",
"\n",
"graph.query(movies_query)"
]
},
{
"cell_type": "markdown",
"id": "0cb0ea30-ca55-4f35-aad6-beb57453de66",
"metadata": {},
"source": [
"## Detecting entities in the user input\n",
"We have to extract the types of entities/values we want to map to a graph database. In this example, we are dealing with a movie graph, so we can map movies and people to the database."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "e1a19424-6046-40c2-81d1-f3b88193a293",
"metadata": {},
"outputs": [],
"source": [
"from typing import List, Optional\n",
"\n",
"from langchain_core.prompts import ChatPromptTemplate\n",
"from langchain_openai import ChatOpenAI\n",
"from pydantic import BaseModel, Field\n",
"\n",
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
"\n",
"\n",
"class Entities(BaseModel):\n",
" \"\"\"Identifying information about entities.\"\"\"\n",
"\n",
" names: List[str] = Field(\n",
" ...,\n",
" description=\"All the person or movies appearing in the text\",\n",
" )\n",
"\n",
"\n",
"prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\n",
" \"system\",\n",
" \"You are extracting person and movies from the text.\",\n",
" ),\n",
" (\n",
" \"human\",\n",
" \"Use the given format to extract information from the following \"\n",
" \"input: {question}\",\n",
" ),\n",
" ]\n",
")\n",
"\n",
"\n",
"entity_chain = prompt | llm.with_structured_output(Entities)"
]
},
{
"cell_type": "markdown",
"id": "9c14084c-37a7-4a9c-a026-74e12961c781",
"metadata": {},
"source": [
"We can test the entity extraction chain."
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "bbfe0d8f-982e-46e6-88fb-8a4f0d850b07",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Entities(names=['Casino'])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"entities = entity_chain.invoke({\"question\": \"Who played in Casino movie?\"})\n",
"entities"
]
},
{
"cell_type": "markdown",
"id": "a8afbf13-05d0-4383-8050-f88b8c2f6fab",
"metadata": {},
"source": [
"We will utilize a simple `CONTAINS` clause to match entities to database. In practice, you might want to use a fuzzy search or a fulltext index to allow for minor misspellings."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "6f92929f-74fb-4db2-b7e1-eb1e9d386a67",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Casino maps to Casino Movie in database\\n'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"match_query = \"\"\"MATCH (p:Person|Movie)\n",
"WHERE p.name CONTAINS $value OR p.title CONTAINS $value\n",
"RETURN coalesce(p.name, p.title) AS result, labels(p)[0] AS type\n",
"LIMIT 1\n",
"\"\"\"\n",
"\n",
"\n",
"def map_to_database(entities: Entities) -> Optional[str]:\n",
" result = \"\"\n",
" for entity in entities.names:\n",
" response = graph.query(match_query, {\"value\": entity})\n",
" try:\n",
" result += f\"{entity} maps to {response[0]['result']} {response[0]['type']} in database\\n\"\n",
" except IndexError:\n",
" pass\n",
" return result\n",
"\n",
"\n",
"map_to_database(entities)"
]
},
{
"cell_type": "markdown",
"id": "f66c6756-6efb-4b1e-9b5d-87ed914a5212",
"metadata": {},
"source": [
"## Custom Cypher generating chain\n",
"\n",
"We need to define a custom Cypher prompt that takes the entity mapping information along with the schema and the user question to construct a Cypher statement.\n",
"We will be using the LangChain expression language to accomplish that."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "8ef3e21d-f1c2-45e2-9511-4920d1cf6e7e",
"metadata": {},
"outputs": [],
"source": [
"from langchain_core.output_parsers import StrOutputParser\n",
"from langchain_core.runnables import RunnablePassthrough\n",
"\n",
"# Generate Cypher statement based on natural language input\n",
"cypher_template = \"\"\"Based on the Neo4j graph schema below, write a Cypher query that would answer the user's question:\n",
"{schema}\n",
"Entities in the question map to the following database values:\n",
"{entities_list}\n",
"Question: {question}\n",
"Cypher query:\"\"\"\n",
"\n",
"cypher_prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\n",
" \"system\",\n",
" \"Given an input question, convert it to a Cypher query. No pre-amble.\",\n",
" ),\n",
" (\"human\", cypher_template),\n",
" ]\n",
")\n",
"\n",
"cypher_response = (\n",
" RunnablePassthrough.assign(names=entity_chain)\n",
" | RunnablePassthrough.assign(\n",
" entities_list=lambda x: map_to_database(x[\"names\"]),\n",
" schema=lambda _: graph.get_schema,\n",
" )\n",
" | cypher_prompt\n",
" | llm.bind(stop=[\"\\nCypherResult:\"])\n",
" | StrOutputParser()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "1f0011e3-9660-4975-af2a-486b1bc3b954",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'MATCH (:Movie {title: \"Casino\"})<-[:ACTED_IN]-(actor)\\nRETURN actor.name'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cypher = cypher_response.invoke({\"question\": \"Who played in Casino movie?\"})\n",
"cypher"
]
},
{
"cell_type": "markdown",
"id": "38095678-611f-4847-a4de-e51ef7ef727c",
"metadata": {},
"source": [
"## Generating answers based on database results\n",
"\n",
"Now that we have a chain that generates the Cypher statement, we need to execute the Cypher statement against the database and send the database results back to an LLM to generate the final answer.\n",
"Again, we will be using LCEL."
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "d1fa97c0-1c9c-41d3-9ee1-5f1905d17434",
"metadata": {},
"outputs": [],
"source": [
"from langchain_neo4j.chains.graph_qa.cypher_utils import (\n",
" CypherQueryCorrector,\n",
" Schema,\n",
")\n",
"\n",
"graph.refresh_schema()\n",
"# Cypher validation tool for relationship directions\n",
"corrector_schema = [\n",
" Schema(el[\"start\"], el[\"type\"], el[\"end\"])\n",
" for el in graph.structured_schema.get(\"relationships\")\n",
"]\n",
"cypher_validation = CypherQueryCorrector(corrector_schema)\n",
"\n",
"# Generate natural language response based on database results\n",
"response_template = \"\"\"Based on the the question, Cypher query, and Cypher response, write a natural language response:\n",
"Question: {question}\n",
"Cypher query: {query}\n",
"Cypher Response: {response}\"\"\"\n",
"\n",
"response_prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\n",
" \"system\",\n",
" \"Given an input question and Cypher response, convert it to a natural\"\n",
" \" language answer. No pre-amble.\",\n",
" ),\n",
" (\"human\", response_template),\n",
" ]\n",
")\n",
"\n",
"chain = (\n",
" RunnablePassthrough.assign(query=cypher_response)\n",
" | RunnablePassthrough.assign(\n",
" response=lambda x: graph.query(cypher_validation(x[\"query\"])),\n",
" )\n",
" | response_prompt\n",
" | llm\n",
" | StrOutputParser()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "918146e5-7918-46d2-a774-53f9547d8fcb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Robert De Niro, James Woods, Joe Pesci, and Sharon Stone played in the movie \"Casino\".'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain.invoke({\"question\": \"Who played in Casino movie?\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7ba75cd-8399-4e54-a6f8-8a411f159f56",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,548 @@
{
"cells": [
{
"cell_type": "raw",
"metadata": {},
"source": [
"---\n",
"sidebar_position: 2\n",
"---"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# How to best prompt for Graph-RAG\n",
"\n",
"In this guide we'll go over prompting strategies to improve graph database query generation. We'll largely focus on methods for getting relevant database-specific information in your prompt.\n",
"\n",
"## Setup\n",
"\n",
"First, get required packages and set environment variables:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --upgrade --quiet langchain langchain-neo4j langchain-openai neo4j"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We default to OpenAI models in this guide, but you can swap them out for the model provider of your choice."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
" ········\n"
]
}
],
"source": [
"import getpass\n",
"import os\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n",
"\n",
"# Uncomment the below to use LangSmith. Not required.\n",
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n",
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we need to define Neo4j credentials.\n",
"Follow [these installation steps](https://neo4j.com/docs/operations-manual/current/installation/) to set up a Neo4j database."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"NEO4J_URI\"] = \"bolt://localhost:7687\"\n",
"os.environ[\"NEO4J_USERNAME\"] = \"neo4j\"\n",
"os.environ[\"NEO4J_PASSWORD\"] = \"password\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The below example will create a connection with a Neo4j database and will populate it with example data about movies and their actors."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain_neo4j import Neo4jGraph\n",
"\n",
"graph = Neo4jGraph()\n",
"\n",
"# Import movie information\n",
"\n",
"movies_query = \"\"\"\n",
"LOAD CSV WITH HEADERS FROM \n",
"'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv'\n",
"AS row\n",
"MERGE (m:Movie {id:row.movieId})\n",
"SET m.released = date(row.released),\n",
" m.title = row.title,\n",
" m.imdbRating = toFloat(row.imdbRating)\n",
"FOREACH (director in split(row.director, '|') | \n",
" MERGE (p:Person {name:trim(director)})\n",
" MERGE (p)-[:DIRECTED]->(m))\n",
"FOREACH (actor in split(row.actors, '|') | \n",
" MERGE (p:Person {name:trim(actor)})\n",
" MERGE (p)-[:ACTED_IN]->(m))\n",
"FOREACH (genre in split(row.genres, '|') | \n",
" MERGE (g:Genre {name:trim(genre)})\n",
" MERGE (m)-[:IN_GENRE]->(g))\n",
"\"\"\"\n",
"\n",
"graph.query(movies_query)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Filtering graph schema\n",
"\n",
"At times, you may need to focus on a specific subset of the graph schema while generating Cypher statements.\n",
"Let's say we are dealing with the following graph schema:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Node properties are the following:\n",
"Movie {imdbRating: FLOAT, id: STRING, released: DATE, title: STRING},Person {name: STRING},Genre {name: STRING}\n",
"Relationship properties are the following:\n",
"\n",
"The relationships are the following:\n",
"(:Movie)-[:IN_GENRE]->(:Genre),(:Person)-[:DIRECTED]->(:Movie),(:Person)-[:ACTED_IN]->(:Movie)\n"
]
}
],
"source": [
"graph.refresh_schema()\n",
"print(graph.schema)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's say we want to exclude the _Genre_ node from the schema representation we pass to an LLM.\n",
"We can achieve that using the `exclude` parameter of the GraphCypherQAChain chain."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from langchain_neo4j import GraphCypherQAChain\n",
"from langchain_openai import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
"chain = GraphCypherQAChain.from_llm(\n",
" graph=graph,\n",
" llm=llm,\n",
" exclude_types=[\"Genre\"],\n",
" verbose=True,\n",
" allow_dangerous_requests=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Node properties are the following:\n",
"Movie {imdbRating: FLOAT, id: STRING, released: DATE, title: STRING},Person {name: STRING}\n",
"Relationship properties are the following:\n",
"\n",
"The relationships are the following:\n",
"(:Person)-[:DIRECTED]->(:Movie),(:Person)-[:ACTED_IN]->(:Movie)\n"
]
}
],
"source": [
"print(chain.graph_schema)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Few-shot examples\n",
"\n",
"Including examples of natural language questions being converted to valid Cypher queries against our database in the prompt will often improve model performance, especially for complex queries.\n",
"\n",
"Let's say we have the following examples:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"examples = [\n",
" {\n",
" \"question\": \"How many artists are there?\",\n",
" \"query\": \"MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)\",\n",
" },\n",
" {\n",
" \"question\": \"Which actors played in the movie Casino?\",\n",
" \"query\": \"MATCH (m:Movie {{title: 'Casino'}})<-[:ACTED_IN]-(a) RETURN a.name\",\n",
" },\n",
" {\n",
" \"question\": \"How many movies has Tom Hanks acted in?\",\n",
" \"query\": \"MATCH (a:Person {{name: 'Tom Hanks'}})-[:ACTED_IN]->(m:Movie) RETURN count(m)\",\n",
" },\n",
" {\n",
" \"question\": \"List all the genres of the movie Schindler's List\",\n",
" \"query\": \"MATCH (m:Movie {{title: 'Schindler\\\\'s List'}})-[:IN_GENRE]->(g:Genre) RETURN g.name\",\n",
" },\n",
" {\n",
" \"question\": \"Which actors have worked in movies from both the comedy and action genres?\",\n",
" \"query\": \"MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name\",\n",
" },\n",
" {\n",
" \"question\": \"Which directors have made movies with at least three different actors named 'John'?\",\n",
" \"query\": \"MATCH (d:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH d, COUNT(DISTINCT a) AS JohnsCount WHERE JohnsCount >= 3 RETURN d.name\",\n",
" },\n",
" {\n",
" \"question\": \"Identify movies where directors also played a role in the film.\",\n",
" \"query\": \"MATCH (p:Person)-[:DIRECTED]->(m:Movie), (p)-[:ACTED_IN]->(m) RETURN m.title, p.name\",\n",
" },\n",
" {\n",
" \"question\": \"Find the actor with the highest number of movies in the database.\",\n",
" \"query\": \"MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1\",\n",
" },\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can create a few-shot prompt with them like so:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate\n",
"\n",
"example_prompt = PromptTemplate.from_template(\n",
" \"User input: {question}\\nCypher query: {query}\"\n",
")\n",
"prompt = FewShotPromptTemplate(\n",
" examples=examples[:5],\n",
" example_prompt=example_prompt,\n",
" prefix=\"You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\\n\\nHere is the schema information\\n{schema}.\\n\\nBelow are a number of examples of questions and their corresponding Cypher queries.\",\n",
" suffix=\"User input: {question}\\nCypher query: \",\n",
" input_variables=[\"question\", \"schema\"],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n",
"\n",
"Here is the schema information\n",
"foo.\n",
"\n",
"Below are a number of examples of questions and their corresponding Cypher queries.\n",
"\n",
"User input: How many artists are there?\n",
"Cypher query: MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)\n",
"\n",
"User input: Which actors played in the movie Casino?\n",
"Cypher query: MATCH (m:Movie {title: 'Casino'})<-[:ACTED_IN]-(a) RETURN a.name\n",
"\n",
"User input: How many movies has Tom Hanks acted in?\n",
"Cypher query: MATCH (a:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count(m)\n",
"\n",
"User input: List all the genres of the movie Schindler's List\n",
"Cypher query: MATCH (m:Movie {title: 'Schindler\\'s List'})-[:IN_GENRE]->(g:Genre) RETURN g.name\n",
"\n",
"User input: Which actors have worked in movies from both the comedy and action genres?\n",
"Cypher query: MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name\n",
"\n",
"User input: How many artists are there?\n",
"Cypher query: \n"
]
}
],
"source": [
"print(prompt.format(question=\"How many artists are there?\", schema=\"foo\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dynamic few-shot examples\n",
"\n",
"If we have enough examples, we may want to only include the most relevant ones in the prompt, either because they don't fit in the model's context window or because the long tail of examples distracts the model. And specifically, given any input we want to include the examples most relevant to that input.\n",
"\n",
"We can do just this using an ExampleSelector. In this case we'll use a [SemanticSimilarityExampleSelector](https://python.langchain.com/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html), which will store the examples in the vector database of our choosing. At runtime it will perform a similarity search between the input and our examples, and return the most semantically similar ones: "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"from langchain_core.example_selectors import SemanticSimilarityExampleSelector\n",
"from langchain_neo4j import Neo4jVector\n",
"from langchain_openai import OpenAIEmbeddings\n",
"\n",
"example_selector = SemanticSimilarityExampleSelector.from_examples(\n",
" examples,\n",
" OpenAIEmbeddings(),\n",
" Neo4jVector,\n",
" k=5,\n",
" input_keys=[\"question\"],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'query': 'MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)',\n",
" 'question': 'How many artists are there?'},\n",
" {'query': \"MATCH (a:Person {{name: 'Tom Hanks'}})-[:ACTED_IN]->(m:Movie) RETURN count(m)\",\n",
" 'question': 'How many movies has Tom Hanks acted in?'},\n",
" {'query': \"MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name\",\n",
" 'question': 'Which actors have worked in movies from both the comedy and action genres?'},\n",
" {'query': \"MATCH (d:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH d, COUNT(DISTINCT a) AS JohnsCount WHERE JohnsCount >= 3 RETURN d.name\",\n",
" 'question': \"Which directors have made movies with at least three different actors named 'John'?\"},\n",
" {'query': 'MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1',\n",
" 'question': 'Find the actor with the highest number of movies in the database.'}]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"example_selector.select_examples({\"question\": \"how many artists are there?\"})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To use it, we can pass the ExampleSelector directly in to our FewShotPromptTemplate:"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"prompt = FewShotPromptTemplate(\n",
" example_selector=example_selector,\n",
" example_prompt=example_prompt,\n",
" prefix=\"You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\\n\\nHere is the schema information\\n{schema}.\\n\\nBelow are a number of examples of questions and their corresponding Cypher queries.\",\n",
" suffix=\"User input: {question}\\nCypher query: \",\n",
" input_variables=[\"question\", \"schema\"],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n",
"\n",
"Here is the schema information\n",
"foo.\n",
"\n",
"Below are a number of examples of questions and their corresponding Cypher queries.\n",
"\n",
"User input: How many artists are there?\n",
"Cypher query: MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)\n",
"\n",
"User input: How many movies has Tom Hanks acted in?\n",
"Cypher query: MATCH (a:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count(m)\n",
"\n",
"User input: Which actors have worked in movies from both the comedy and action genres?\n",
"Cypher query: MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name\n",
"\n",
"User input: Which directors have made movies with at least three different actors named 'John'?\n",
"Cypher query: MATCH (d:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH d, COUNT(DISTINCT a) AS JohnsCount WHERE JohnsCount >= 3 RETURN d.name\n",
"\n",
"User input: Find the actor with the highest number of movies in the database.\n",
"Cypher query: MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1\n",
"\n",
"User input: how many artists are there?\n",
"Cypher query: \n"
]
}
],
"source": [
"print(prompt.format(question=\"how many artists are there?\", schema=\"foo\"))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
"chain = GraphCypherQAChain.from_llm(\n",
" graph=graph,\n",
" llm=llm,\n",
" cypher_prompt=prompt,\n",
" verbose=True,\n",
" allow_dangerous_requests=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
"Generated Cypher:\n",
"\u001b[32;1m\u001b[1;3mMATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)\u001b[0m\n",
"Full Context:\n",
"\u001b[32;1m\u001b[1;3m[{'count(DISTINCT a)': 967}]\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'query': 'How many actors are in the graph?',\n",
" 'result': 'There are 967 actors in the graph.'}"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain.invoke(\"How many actors are in the graph?\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -316,7 +316,9 @@ For a high-level tutorial, check out [this guide](/docs/tutorials/sql_qa/).
You can use an LLM to do question answering over graph databases.
For a high-level tutorial, check out [this guide](/docs/tutorials/graph/).
- [How to: map values to a database](/docs/how_to/graph_mapping)
- [How to: add a semantic layer over the database](/docs/how_to/graph_semantic)
- [How to: improve results with prompting](/docs/how_to/graph_prompting)
- [How to: construct knowledge graphs](/docs/how_to/graph_constructing)
### Summarization

View File

@@ -12,7 +12,7 @@
"There are two ways to implement a custom parser:\n",
"\n",
"1. Using `RunnableLambda` or `RunnableGenerator` in [LCEL](/docs/concepts/lcel/) -- we strongly recommend this for most use cases\n",
"2. By inheriting from one of the base classes for out parsing -- this is the hard way of doing things\n",
"2. By inherting from one of the base classes for out parsing -- this is the hard way of doing things\n",
"\n",
"The difference between the two approaches are mostly superficial and are mainly in terms of which callbacks are triggered (e.g., `on_chain_start` vs. `on_parser_start`), and how a runnable lambda vs. a parser might be visualized in a tracing platform like LangSmith."
]
@@ -200,7 +200,7 @@
"id": "24067447-8a5a-4d6b-86a3-4b9cc4b4369b",
"metadata": {},
"source": [
"## Inheriting from Parsing Base Classes"
"## Inherting from Parsing Base Classes"
]
},
{
@@ -208,7 +208,7 @@
"id": "9713f547-b2e4-48eb-807f-a0f6f6d0e7e0",
"metadata": {},
"source": [
"Another approach to implement a parser is by inheriting from `BaseOutputParser`, `BaseGenerationOutputParser` or another one of the base parsers depending on what you need to do.\n",
"Another approach to implement a parser is by inherting from `BaseOutputParser`, `BaseGenerationOutputParser` or another one of the base parsers depending on what you need to do.\n",
"\n",
"In general, we **do not** recommend this approach for most use cases as it results in more code to write without significant benefits.\n",
"\n",

View File

@@ -55,7 +55,7 @@
"* Run `.read Chinook_Sqlite.sql`\n",
"* Test `SELECT * FROM Artist LIMIT 10;`\n",
"\n",
"Now, `Chinook.db` is in our directory and we can interface with it using the SQLAlchemy-driven [SQLDatabase](https://python.langchain.com/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html) class:"
"Now, `Chinhook.db` is in our directory and we can interface with it using the SQLAlchemy-driven [SQLDatabase](https://python.langchain.com/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html) class:"
]
},
{

View File

@@ -51,7 +51,7 @@
"* Run `.read Chinook_Sqlite.sql`\n",
"* Test `SELECT * FROM Artist LIMIT 10;`\n",
"\n",
"Now, `Chinook.db` is in our directory and we can interface with it using the SQLAlchemy-driven `SQLDatabase` class:"
"Now, `Chinhook.db` is in our directory and we can interface with it using the SQLAlchemy-driven `SQLDatabase` class:"
]
},
{

View File

@@ -54,7 +54,7 @@
"* Run `.read Chinook_Sqlite.sql`\n",
"* Test `SELECT * FROM Artist LIMIT 10;`\n",
"\n",
"Now, `Chinook.db` is in our directory and we can interface with it using the SQLAlchemy-driven `SQLDatabase` class:"
"Now, `Chinhook.db` is in our directory and we can interface with it using the SQLAlchemy-driven `SQLDatabase` class:"
]
},
{

View File

@@ -336,7 +336,7 @@
"\n",
"The **MultiQueryRetriever** is used to tackle the problem that the RAG pipeline might not return the best set of documents based on the query. It generates multiple queries that mean the same as the original query and then fetches documents for each.\n",
"\n",
"To evaluate this retriever, UpTrain will run the following evaluation:\n",
"To evluate this retriever, UpTrain will run the following evaluation:\n",
"- **[Multi Query Accuracy](https://docs.uptrain.ai/predefined-evaluations/query-quality/multi-query-accuracy)**: Checks if the multi-queries generated mean the same as the original query."
]
},

View File

@@ -36,7 +36,7 @@
"### Integration details\n",
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/docs/integrations/chat/ibm/) | Package downloads | Package latest |\n",
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
"| [ChatWatsonx](https://python.langchain.com/api_reference/ibm/chat_models/langchain_ibm.chat_models.ChatWatsonx.html) | [langchain-ibm](https://python.langchain.com/api_reference/ibm/index.html) | ❌ | ❌ | ✅ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-ibm?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-ibm?style=flat-square&label=%20) |\n",
"| [ChatWatsonx](https://python.langchain.com/api_reference/ibm/chat_models/langchain_ibm.chat_models.ChatWatsonx.html#langchain_ibm.chat_models.ChatWatsonx) | [langchain-ibm](https://python.langchain.com/api_reference/ibm/index.html) | ❌ | ❌ | ✅ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-ibm?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-ibm?style=flat-square&label=%20) |\n",
"\n",
"### Model features\n",
"| [Tool calling](/docs/how_to/tool_calling/) | [Structured output](/docs/how_to/structured_output/) | JSON mode | Image input | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",

File diff suppressed because it is too large Load Diff

View File

@@ -1,39 +0,0 @@
# Linkup
> [Linkup](https://www.linkup.so/) provides an API to connect LLMs to the web and the Linkup Premium Partner sources.
## Installation and Setup
To use the Linkup provider, you first need a valid API key, which you can find by signing-up [here](https://app.linkup.so/sign-up).
You will also need the `langchain-linkup` package, which you can install using pip:
```bash
pip install langchain-linkup
```
## Retriever
See a [usage example](/docs/integrations/retrievers/linkup_search).
```python
from langchain_linkup import LinkupSearchRetriever
retriever = LinkupSearchRetriever(
depth="deep", # "standard" or "deep"
linkup_api_key=None, # API key can be passed here or set as the LINKUP_API_KEY environment variable
)
```
## Tools
See a [usage example](/docs/integrations/tools/linkup_search).
```python
from langchain_linkup import LinkupSearchTool
tool = LinkupSearchTool(
depth="deep", # "standard" or "deep"
output_type="searchResults", # "searchResults", "sourcedAnswer" or "structured"
linkup_api_key=None, # API key can be passed here or set as the LINKUP_API_KEY environment variable
)
```

View File

@@ -343,31 +343,6 @@ See a [usage example](/docs/integrations/memory/postgres_chat_message_history/).
Since Azure Database for PostgreSQL is open-source Postgres, you can use the [LangChain's Postgres support](/docs/integrations/vectorstores/pgvector/) to connect to Azure Database for PostgreSQL.
### Azure SQL Database
>[Azure SQL Database](https://learn.microsoft.com/azure/azure-sql/database/sql-database-paas-overview?view=azuresql) is a robust service that combines scalability, security, and high availability, providing all the benefits of a modern database solution. It also provides a dedicated Vector data type & built-in functions that simplifies the storage and querying of vector embeddings directly within a relational database. This eliminates the need for separate vector databases and related integrations, increasing the security of your solutions while reducing the overall complexity.
By leveraging your current SQL Server databases for vector search, you can enhance data capabilities while minimizing expenses and avoiding the challenges of transitioning to new systems.
##### Installation and Setup
See [detail configuration instructions](/docs/integrations/vectorstores/sqlserver).
We need to install the `langchain-sqlserver` python package.
```bash
!pip install langchain-sqlserver==0.1.1
```
##### Deploy Azure SQL DB on Microsoft Azure
[Sign Up](https://learn.microsoft.com/azure/azure-sql/database/free-offer?view=azuresql) for free to get started today.
See a [usage example](/docs/integrations/vectorstores/sqlserver).
```python
from langchain_sqlserver import SQLServer_VectorStore
```
### Azure AI Search

View File

@@ -1,41 +0,0 @@
# ScrapeGraph AI
>[ScrapeGraph AI](https://scrapegraphai.com) is a service that provides AI-powered web scraping capabilities.
>It offers tools for extracting structured data, converting webpages to markdown, and processing local HTML content
>using natural language prompts.
## Installation and Setup
Install the required packages:
```bash
pip install langchain-scrapegraph
```
Set up your API key:
```bash
export SGAI_API_KEY="your-scrapegraph-api-key"
```
## Tools
See a [usage example](/docs/integrations/tools/scrapegraph).
There are four tools available:
```python
from langchain_scrapegraph.tools import (
SmartScraperTool, # Extract structured data from websites
MarkdownifyTool, # Convert webpages to markdown
LocalScraperTool, # Process local HTML content
GetCreditsTool, # Check remaining API credits
)
```
Each tool serves a specific purpose:
- `SmartScraperTool`: Extract structured data from websites given a URL, prompt and optional output schema
- `MarkdownifyTool`: Convert any webpage to clean markdown format
- `LocalScraperTool`: Extract structured data from a local HTML file given a prompt and optional output schema
- `GetCreditsTool`: Check your remaining ScrapeGraph AI credits

View File

@@ -8,7 +8,7 @@
"\n",
">[Upstage](https://upstage.ai) is a leading artificial intelligence (AI) company specializing in delivering above-human-grade performance LLM components.\n",
">\n",
">**Solar Pro** is an enterprise-grade LLM optimized for single-GPU deployment, excelling in instruction-following and processing structured formats like HTML and Markdown. It supports English, Korean, and Japanese with top multilingual performance and offers domain expertise in finance, healthcare, and legal.\n",
">**Solar Mini Chat** is a fast yet powerful advanced large language model focusing on English and Korean. It has been specifically fine-tuned for multi-turn chat purposes, showing enhanced performance across a wide range of natural language processing tasks, like multi-turn conversation or tasks that require an understanding of long contexts, such as RAG (Retrieval-Augmented Generation), compared to other models of a similar size. This fine-tuning equips it with the ability to handle longer conversations more effectively, making it particularly adept for interactive applications.\n",
"\n",
">Other than Solar, Upstage also offers features for real-world RAG (retrieval-augmented generation), such as **Document Parse** and **Groundedness Check**. \n"
]
@@ -21,12 +21,12 @@
"\n",
"| API | Description | Import | Example usage |\n",
"| --- | --- | --- | --- |\n",
"| Chat | Build assistants using Solar Chat | `from langchain_upstage import ChatUpstage` | [Go](../../chat/upstage) |\n",
"| Chat | Build assistants using Solar Mini Chat | `from langchain_upstage import ChatUpstage` | [Go](../../chat/upstage) |\n",
"| Text Embedding | Embed strings to vectors | `from langchain_upstage import UpstageEmbeddings` | [Go](../../text_embedding/upstage) |\n",
"| Groundedness Check | Verify groundedness of assistant's response | `from langchain_upstage import UpstageGroundednessCheck` | [Go](../../tools/upstage_groundedness_check) |\n",
"| Document Parse | Serialize documents with tables and figures | `from langchain_upstage import UpstageDocumentParseLoader` | [Go](../../document_loaders/upstage) |\n",
"\n",
"See [documentations](https://console.upstage.ai/docs/getting-started/overview) for more details about the models and features."
"See [documentations](https://developers.upstage.ai/) for more details about the features."
]
},
{

View File

@@ -35,9 +35,9 @@
"\n",
"### Integration details\n",
"\n",
"| Class | Package | [JS support](https://js.langchain.com/docs/integrations/document_compressors/ibm/) | Package downloads | Package latest |\n",
"| Class | Package | JS support | Package downloads | Package latest |\n",
"| :--- | :--- | :---: | :---: | :---: |\n",
"| [WatsonxRerank](https://python.langchain.com/api_reference/ibm/rerank/langchain_ibm.rerank.WatsonxRerank.html) | [langchain-ibm](https://python.langchain.com/api_reference/ibm/index.html) | | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-ibm?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-ibm?style=flat-square&label=%20) |"
"| [WatsonxRerank](https://python.langchain.com/api_reference/ibm/chat_models/langchain_ibm.rerank.WatsonxRerank.html) | [langchain-ibm](https://python.langchain.com/api_reference/ibm/index.html) | | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-ibm?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-ibm?style=flat-square&label=%20) |"
]
},
{
@@ -445,7 +445,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "langchain_ibm",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},

View File

@@ -1,270 +0,0 @@
{
"cells": [
{
"cell_type": "raw",
"id": "afaf8039",
"metadata": {},
"source": [
"---\n",
"sidebar_label: LinkupSearchRetriever\n",
"---"
]
},
{
"cell_type": "markdown",
"id": "e49f1e0d",
"metadata": {},
"source": [
"# LinkupSearchRetriever\n",
"\n",
"> [Linkup](https://www.linkup.so/) provides an API to connect LLMs to the web and the Linkup Premium Partner sources.\n",
"\n",
"This will help you getting started with the LinkupSearchRetriever [retriever](/docs/concepts/retrievers/). For detailed documentation of all LinkupSearchRetriever features and configurations head to the [API reference](https://python.langchain.com/api_reference/linkup/retrievers/linkup_langchain.search_retriever.LinkupSearchRetriever.html).\n",
"\n",
"### Integration details\n",
"\n",
"| Retriever | Source | Package |\n",
"| :--- | :--- | :---: |\n",
"[LinkupSearchRetriever](https://python.langchain.com/api_reference/linkup/retrievers/linkup_langchain.search_retriever.LinkupSearchRetriever.html) | Web and partner sources | langchain-linkup |\n",
"\n",
"## Setup\n",
"\n",
"To use the Linkup provider, you need a valid API key, which you can find by signing-up [here](https://app.linkup.so/sign-up). You can then set it up as the `LINKUP_API_KEY` environment variable. For the chain example below, you also need to set an OpenAI API key as `OPENAI_API_KEY` environment variable, which you can also do here:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0c6cab32-8f55-473d-b5bc-72673ea4da61",
"metadata": {},
"outputs": [],
"source": [
"# import os\n",
"# os.environ[\"LINKUP_API_KEY\"] = \"\" # Fill with your API key\n",
"# os.environ[\"OPENAI_API_KEY\"] = \"\" # Fill with your API key"
]
},
{
"cell_type": "markdown",
"id": "72ee0c4b-9764-423a-9dbf-95129e185210",
"metadata": {},
"source": [
"If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
"metadata": {},
"outputs": [],
"source": [
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
]
},
{
"cell_type": "markdown",
"id": "0730d6a1-c893-4840-9817-5e5251676d5d",
"metadata": {},
"source": [
"### Installation\n",
"\n",
"This retriever lives in the `langchain-linkup` package:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
"metadata": {},
"outputs": [],
"source": [
"%pip install -qU langchain-linkup"
]
},
{
"cell_type": "markdown",
"id": "a38cde65-254d-4219-a441-068766c0d4b5",
"metadata": {},
"source": [
"## Instantiation\n",
"\n",
"Now we can instantiate our retriever:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70cc8e65-2a02-408a-bbc6-8ef649057d82",
"metadata": {},
"outputs": [],
"source": [
"from langchain_linkup import LinkupSearchRetriever\n",
"\n",
"retriever = LinkupSearchRetriever(\n",
" depth=\"deep\", # \"standard\" or \"deep\"\n",
" linkup_api_key=None, # API key can be passed here or set as the LINKUP_API_KEY environment variable\n",
")"
]
},
{
"cell_type": "markdown",
"id": "5c5f2839-4020-424e-9fc9-07777eede442",
"metadata": {},
"source": [
"## Usage"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "51a60dbe-9f2e-4e04-bb62-23968f17164a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(metadata={'name': 'US presidential election results 2024: Harris vs. Trump | Live maps ...', 'url': 'https://www.reuters.com/graphics/USA-ELECTION/RESULTS/zjpqnemxwvx/'}, page_content='Updated results from the 2024 election for the US president. Reuters live coverage of the 2024 US President, Senate, House and state governors races.'),\n",
" Document(metadata={'name': 'Election 2024: Presidential results - CNN', 'url': 'https://www.cnn.com/election/2024/results/president'}, page_content='View maps and real-time results for the 2024 US presidential election matchup between former President Donald Trump and Vice President Kamala Harris. For more ...'),\n",
" Document(metadata={'name': 'Presidential Election 2024 Live Results: Donald Trump wins - NBC News', 'url': 'https://www.nbcnews.com/politics/2024-elections/president-results'}, page_content='View live election results from the 2024 presidential race as Kamala Harris and Donald Trump face off. See the map of votes by state as results are tallied.'),\n",
" Document(metadata={'name': '2024 President Election - Live Results | RealClearPolitics', 'url': 'https://www.realclearpolitics.com/elections/live_results/2024/president/'}, page_content='Latest Election 2024 Results • President • United States • Tuesday November 3rd • Presidential Election Details'),\n",
" Document(metadata={'name': 'Live: Presidential Election Results 2024 : NPR', 'url': 'https://apps.npr.org/2024-election-results/'}, page_content='Presidential race ratings are based on NPR analysis. Maps do not shade in until 50% of the estimated vote is in for a given state, to mitigate flutuations in early returns . 2024 General Election Results'),\n",
" Document(metadata={'name': '2024 US Presidential Election Results: Live Map - Bloomberg.com', 'url': 'https://www.bloomberg.com/graphics/2024-us-election-results/'}, page_content='US Presidential Election Results November 5, 2024. Bloomberg News is reporting live election results in the presidential race between Democratic Vice President Kamala Harris and her Republican ...'),\n",
" Document(metadata={'name': 'Presidential Election Results 2024: Electoral Votes & Map by State ...', 'url': 'https://www.politico.com/2024-election/results/president/'}, page_content='Live 2024 Presidential election results, maps and electoral votes by state. POLITICOs real-time coverage of 2024 races for President, Senate, House and Governor.'),\n",
" Document(metadata={'name': 'US Presidential Election Results 2024 - BBC News', 'url': 'https://www.bbc.com/news/election/2024/us/results'}, page_content='Kamala Harris of the Democrat party has 74,498,303 votes (48.3%) Donald Trump of the Republican party has 76,989,499 votes (49.9%) This map of the US states was filled in as presidential results ...'),\n",
" Document(metadata={'name': 'Election Results 2024: Live Map - Races by State - POLITICO', 'url': 'https://www.politico.com/2024-election/results/'}, page_content='Live 2024 election results and maps by state. POLITICOs real-time coverage of 2024 races for President, Senate, House and Governor.'),\n",
" Document(metadata={'name': '2024 U.S. Presidential Election: Live Results and Maps - USA TODAY', 'url': 'https://www.usatoday.com/elections/results/2024-11-05/president'}, page_content='See who is winning in the Nov. 5, 2024 U.S. Presidential election nationwide with real-time results and state-by-state maps.'),\n",
" Document(metadata={'name': 'Presidential Election 2024 Live Results: Donald Trump winsNBC News LogoSearchSearchNBC News LogoMSNBC LogoToday Logo', 'url': 'https://www.nbcnews.com/politics/2024-elections/president-results'}, page_content=\"Profile\\n\\nSections\\n\\nLocal\\n\\ntv\\n\\nFeatured\\n\\nMore From NBC\\n\\nFollow NBC News\\n\\nnews Alerts\\n\\nThere are no new alerts at this time\\n\\n2024 President Results: Trump wins\\n==================================\\n\\nDonald Trump has secured more than the 270 Electoral College votes needed to secure the presidency, NBC News projects.\\n\\nRaces to watch\\n--------------\\n\\nAll Presidential races\\n----------------------\\n\\nElection Night Coverage\\n-----------------------\\n\\n### China competition should be top priority for Trump, Sullivan says, as Biden and Xi prepare for final meeting\\n\\n### Jim Himes says 'truth and analysis are not what drive Gabbard and Gaetz\\n\\n### Trump praises RFK Jr. in Mar-a-Lago remarks\\n\\n### Trump announces North Dakota Gov. Doug Burgum as his pick for interior secretary\\n\\n### House Ethics Committee cancels meeting at which Gaetz probe was on the agenda\\n\\n### Trump picks former Rep. Doug Collins for veterans affairs secretary\\n\\n### Trump to nominate his criminal defense lawyer for deputy attorney general\\n\\n### From brilliant to dangerous: Mixed reactions roll in after Trump picks RFK Jr. for top health post\\n\\n### Donald Trump Jr. says he played key role in RFK Jr., Tulsi Gabbard picks\\n\\n### Jared Polis offers surprising words of support for RFK Jr. pick for HHS secretary\\n\\nNational early voting\\n---------------------\\n\\n### 88,233,886 mail-in and early in-person votes cast nationally\\n\\n### 65,676,748 mail-in and early in-person votes requested nationally\\n\\nPast Presidential Elections\\n---------------------------\\n\\n### Vote Margin by State in the 2020 Presidential Election\\n\\nCircle size represents the number electoral votes in that state.\\n\\nThe expected vote is the total number of votes that are expected in a given race once all votes are counted. This number is an estimate and is based on several different factors, including information on the number of votes cast early as well as information provided to our vote reporters on Election Day from county election officials. The figure can change as NBC News gathers new information.\\n\\n**Source**: [National Election Pool (NEP)](https://www.nbcnews.com/politics/2024-elections/how-election-data-is-collected )\\n\\n2024 election results\\n---------------------\\n\\nElection Night Coverage\\n-----------------------\\n\\n### China competition should be top priority for Trump, Sullivan says, as Biden and Xi prepare for final meeting\\n\\n### Jim Himes says 'truth and analysis are not what drive Gabbard and Gaetz\\n\\n### Trump praises RFK Jr. in Mar-a-Lago remarks\\n\\n©\\xa02024 NBCUniversal Media, LLC\")]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query = \"Who won the latest US presidential elections?\"\n",
"\n",
"retriever.invoke(query)"
]
},
{
"cell_type": "markdown",
"id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e",
"metadata": {},
"source": [
"## Use within a chain\n",
"\n",
"Like other retrievers, LinkupSearchRetriever can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n",
"\n",
"We will need a LLM or chat model:\n",
"\n",
"```{=mdx}\n",
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
"\n",
"<ChatModelTabs customVarName=\"llm\" />\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "25b647a3-f8f2-4541-a289-7a241e43f9df",
"metadata": {},
"outputs": [],
"source": [
"# | output: false\n",
"# | echo: false\n",
"\n",
"from langchain_openai import ChatOpenAI\n",
"\n",
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae",
"metadata": {},
"outputs": [],
"source": [
"from langchain_core.output_parsers import StrOutputParser\n",
"from langchain_core.prompts import ChatPromptTemplate\n",
"from langchain_core.runnables import RunnablePassthrough\n",
"\n",
"prompt = ChatPromptTemplate.from_template(\n",
" \"\"\"Answer the question based only on the context provided.\n",
"\n",
"Context: {context}\n",
"\n",
"Question: {question}\"\"\"\n",
")\n",
"\n",
"\n",
"def format_docs(docs):\n",
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
"\n",
"\n",
"chain = (\n",
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
" | prompt\n",
" | llm\n",
" | StrOutputParser()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'The 3 latest US presidential elections were won by Joe Biden in 2020, Donald Trump in 2016, and Barack Obama in 2012.'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain.invoke(\"Who won the 3 latest US presidential elections?\")"
]
},
{
"cell_type": "markdown",
"id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
"metadata": {},
"source": [
"## API reference\n",
"\n",
"For detailed documentation of all LinkupSearchRetriever features and configurations head to the [API reference](https://python.langchain.com/api_reference/linkup/retrievers/linkup_langchain.search_retriever.LinkupSearchRetriever.html)."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -194,7 +194,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -146,7 +146,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -164,7 +164,7 @@
},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -185,7 +185,7 @@
},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_community.llms import Tongyi\n",
"\n",

View File

@@ -282,7 +282,7 @@
},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -196,7 +196,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -125,7 +125,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_community.query_constructors.hanavector import HanaTranslator\n",
"from langchain_openai import ChatOpenAI\n",

View File

@@ -119,7 +119,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -160,7 +160,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -165,7 +165,7 @@
},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import ChatOpenAI\n",
"\n",

View File

@@ -168,7 +168,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -135,7 +135,7 @@
},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -141,7 +141,7 @@
},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -190,7 +190,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -144,7 +144,7 @@
},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -194,7 +194,7 @@
},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -308,7 +308,7 @@
},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -218,7 +218,7 @@
},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import ChatOpenAI\n",
"\n",

View File

@@ -249,7 +249,7 @@
},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -91,7 +91,7 @@
"os.environ[\"VECTARA_CORPUS_ID\"] = \"<YOUR_VECTARA_CORPUS_ID>\"\n",
"os.environ[\"VECTARA_CUSTOMER_ID\"] = \"<YOUR_VECTARA_CUSTOMER_ID>\"\n",
"\n",
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_community.vectorstores import Vectara\n",
"from langchain_openai.chat_models import ChatOpenAI"

View File

@@ -115,7 +115,7 @@
},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain_openai import OpenAI\n",
"\n",

View File

@@ -0,0 +1,297 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "ce0f17b9",
"metadata": {},
"source": [
"# Weaviate Hybrid Search\n",
"\n",
">[Weaviate](https://weaviate.io/developers/weaviate) is an open-source vector database.\n",
"\n",
">[Hybrid search](https://weaviate.io/blog/hybrid-search-explained) is a technique that combines multiple search algorithms to improve the accuracy and relevance of search results. It uses the best features of both keyword-based search algorithms with vector search techniques.\n",
"\n",
">The `Hybrid search in Weaviate` uses sparse and dense vectors to represent the meaning and context of search queries and documents.\n",
"\n",
"This notebook shows how to use `Weaviate hybrid search` as a LangChain retriever."
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c307b082",
"metadata": {},
"source": [
"Set up the retriever:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "bba863a2-977c-4add-b5f4-bfc33a80eae5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%pip install --upgrade --quiet weaviate-client"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c10dd962",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"import weaviate\n",
"\n",
"WEAVIATE_URL = os.getenv(\"WEAVIATE_URL\")\n",
"auth_client_secret = (weaviate.AuthApiKey(api_key=os.getenv(\"WEAVIATE_API_KEY\")),)\n",
"client = weaviate.Client(\n",
" url=WEAVIATE_URL,\n",
" additional_headers={\n",
" \"X-Openai-Api-Key\": os.getenv(\"OPENAI_API_KEY\"),\n",
" },\n",
")\n",
"\n",
"# client.schema.delete_all()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f47a2bfe",
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.retrievers import (\n",
" WeaviateHybridSearchRetriever,\n",
")\n",
"from langchain_core.documents import Document"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f2eff08e",
"metadata": {},
"outputs": [],
"source": [
"retriever = WeaviateHybridSearchRetriever(\n",
" client=client,\n",
" index_name=\"LangChain\",\n",
" text_key=\"text\",\n",
" attributes=[],\n",
" create_schema_if_missing=True,\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "b68debff",
"metadata": {},
"source": [
"Add some data:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "cd8a7b17",
"metadata": {},
"outputs": [],
"source": [
"docs = [\n",
" Document(\n",
" metadata={\n",
" \"title\": \"Embracing The Future: AI Unveiled\",\n",
" \"author\": \"Dr. Rebecca Simmons\",\n",
" },\n",
" page_content=\"A comprehensive analysis of the evolution of artificial intelligence, from its inception to its future prospects. Dr. Simmons covers ethical considerations, potentials, and threats posed by AI.\",\n",
" ),\n",
" Document(\n",
" metadata={\n",
" \"title\": \"Symbiosis: Harmonizing Humans and AI\",\n",
" \"author\": \"Prof. Jonathan K. Sterling\",\n",
" },\n",
" page_content=\"Prof. Sterling explores the potential for harmonious coexistence between humans and artificial intelligence. The book discusses how AI can be integrated into society in a beneficial and non-disruptive manner.\",\n",
" ),\n",
" Document(\n",
" metadata={\"title\": \"AI: The Ethical Quandary\", \"author\": \"Dr. Rebecca Simmons\"},\n",
" page_content=\"In her second book, Dr. Simmons delves deeper into the ethical considerations surrounding AI development and deployment. It is an eye-opening examination of the dilemmas faced by developers, policymakers, and society at large.\",\n",
" ),\n",
" Document(\n",
" metadata={\n",
" \"title\": \"Conscious Constructs: The Search for AI Sentience\",\n",
" \"author\": \"Dr. Samuel Cortez\",\n",
" },\n",
" page_content=\"Dr. Cortez takes readers on a journey exploring the controversial topic of AI consciousness. The book provides compelling arguments for and against the possibility of true AI sentience.\",\n",
" ),\n",
" Document(\n",
" metadata={\n",
" \"title\": \"Invisible Routines: Hidden AI in Everyday Life\",\n",
" \"author\": \"Prof. Jonathan K. Sterling\",\n",
" },\n",
" page_content=\"In his follow-up to 'Symbiosis', Prof. Sterling takes a look at the subtle, unnoticed presence and influence of AI in our everyday lives. It reveals how AI has become woven into our routines, often without our explicit realization.\",\n",
" ),\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3c5970db",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['3a27b0a5-8dbb-4fee-9eba-8b6bc2c252be',\n",
" 'eeb9fd9b-a3ac-4d60-a55b-a63a25d3b907',\n",
" '7ebbdae7-1061-445f-a046-1989f2343d8f',\n",
" 'c2ab315b-3cab-467f-b23a-b26ed186318d',\n",
" 'b83765f2-e5d2-471f-8c02-c3350ade4c4f']"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever.add_documents(docs)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "6e030694",
"metadata": {},
"source": [
"Do a hybrid search:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "bf7dbb98",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='In her second book, Dr. Simmons delves deeper into the ethical considerations surrounding AI development and deployment. It is an eye-opening examination of the dilemmas faced by developers, policymakers, and society at large.', metadata={}),\n",
" Document(page_content='A comprehensive analysis of the evolution of artificial intelligence, from its inception to its future prospects. Dr. Simmons covers ethical considerations, potentials, and threats posed by AI.', metadata={}),\n",
" Document(page_content=\"In his follow-up to 'Symbiosis', Prof. Sterling takes a look at the subtle, unnoticed presence and influence of AI in our everyday lives. It reveals how AI has become woven into our routines, often without our explicit realization.\", metadata={}),\n",
" Document(page_content='Prof. Sterling explores the potential for harmonious coexistence between humans and artificial intelligence. The book discusses how AI can be integrated into society in a beneficial and non-disruptive manner.', metadata={})]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever.invoke(\"the ethical implications of AI\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "d0c5bb4d",
"metadata": {},
"source": [
"Do a hybrid search with where filter:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "b2bc87c1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Prof. Sterling explores the potential for harmonious coexistence between humans and artificial intelligence. The book discusses how AI can be integrated into society in a beneficial and non-disruptive manner.', metadata={}),\n",
" Document(page_content=\"In his follow-up to 'Symbiosis', Prof. Sterling takes a look at the subtle, unnoticed presence and influence of AI in our everyday lives. It reveals how AI has become woven into our routines, often without our explicit realization.\", metadata={})]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever.invoke(\n",
" \"AI integration in society\",\n",
" where_filter={\n",
" \"path\": [\"author\"],\n",
" \"operator\": \"Equal\",\n",
" \"valueString\": \"Prof. Jonathan K. Sterling\",\n",
" },\n",
")"
]
},
{
"cell_type": "markdown",
"id": "5ae2899e",
"metadata": {},
"source": [
"Do a hybrid search with scores:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4fffd0af",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Prof. Sterling explores the potential for harmonious coexistence between humans and artificial intelligence. The book discusses how AI can be integrated into society in a beneficial and non-disruptive manner.', metadata={'_additional': {'explainScore': '(bm25)\\n(hybrid) Document eeb9fd9b-a3ac-4d60-a55b-a63a25d3b907 contributed 0.00819672131147541 to the score\\n(hybrid) Document eeb9fd9b-a3ac-4d60-a55b-a63a25d3b907 contributed 0.00819672131147541 to the score', 'score': '0.016393442'}}),\n",
" Document(page_content=\"In his follow-up to 'Symbiosis', Prof. Sterling takes a look at the subtle, unnoticed presence and influence of AI in our everyday lives. It reveals how AI has become woven into our routines, often without our explicit realization.\", metadata={'_additional': {'explainScore': '(bm25)\\n(hybrid) Document b83765f2-e5d2-471f-8c02-c3350ade4c4f contributed 0.0078125 to the score\\n(hybrid) Document b83765f2-e5d2-471f-8c02-c3350ade4c4f contributed 0.008064516129032258 to the score', 'score': '0.015877016'}}),\n",
" Document(page_content='In her second book, Dr. Simmons delves deeper into the ethical considerations surrounding AI development and deployment. It is an eye-opening examination of the dilemmas faced by developers, policymakers, and society at large.', metadata={'_additional': {'explainScore': '(bm25)\\n(hybrid) Document 7ebbdae7-1061-445f-a046-1989f2343d8f contributed 0.008064516129032258 to the score\\n(hybrid) Document 7ebbdae7-1061-445f-a046-1989f2343d8f contributed 0.0078125 to the score', 'score': '0.015877016'}}),\n",
" Document(page_content='A comprehensive analysis of the evolution of artificial intelligence, from its inception to its future prospects. Dr. Simmons covers ethical considerations, potentials, and threats posed by AI.', metadata={'_additional': {'explainScore': '(vector) [-0.0071824766 -0.0006682752 0.001723625 -0.01897258 -0.0045127636 0.0024410256 -0.020503938 0.013768672 0.009520169 -0.037972264]... \\n(hybrid) Document 3a27b0a5-8dbb-4fee-9eba-8b6bc2c252be contributed 0.007936507936507936 to the score', 'score': '0.007936508'}})]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever.invoke(\n",
" \"AI integration in society\",\n",
" score=True,\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -327,7 +327,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "langchain_ibm",
"display_name": "langchain",
"language": "python",
"name": "python3"
},

View File

@@ -1,201 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "e8712110",
"metadata": {},
"source": [
"## Overview\n",
"\n",
"Model2Vec is a technique to turn any sentence transformer into a really small static model\n",
"[model2vec](https://github.com/MinishLab/model2vec) can be used to generate embeddings."
]
},
{
"cell_type": "markdown",
"id": "266dd424",
"metadata": {},
"source": [
"## Setup\n",
"\n",
"```bash\n",
"pip install -U langchain-community\n",
"```\n"
]
},
{
"cell_type": "markdown",
"id": "78ab91a6",
"metadata": {},
"source": [
"## Instantiation"
]
},
{
"cell_type": "markdown",
"id": "d06e7719",
"metadata": {},
"source": [
"Ensure that `model2vec` is installed\n",
"\n",
"```bash\n",
"pip install -U model2vec\n",
"```"
]
},
{
"cell_type": "markdown",
"id": "f8ea1ed5",
"metadata": {},
"source": [
"## Indexing and Retrieval"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d25dc22d-b656-46c6-a42d-eace958590cd",
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-24T15:13:17.176956Z",
"start_time": "2023-05-24T15:13:15.399076Z"
},
"execution": {
"iopub.execute_input": "2024-03-29T15:39:19.252281Z",
"iopub.status.busy": "2024-03-29T15:39:19.252101Z",
"iopub.status.idle": "2024-03-29T15:39:19.339106Z",
"shell.execute_reply": "2024-03-29T15:39:19.338614Z",
"shell.execute_reply.started": "2024-03-29T15:39:19.252260Z"
}
},
"outputs": [],
"source": [
"from langchain_community.embeddings import Model2vecEmbeddings"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "8397b91f-a1f9-4be6-a699-fedaada7c37a",
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-24T15:13:17.193751Z",
"start_time": "2023-05-24T15:13:17.182053Z"
},
"execution": {
"iopub.execute_input": "2024-03-29T15:39:19.901573Z",
"iopub.status.busy": "2024-03-29T15:39:19.900935Z",
"iopub.status.idle": "2024-03-29T15:39:19.906540Z",
"shell.execute_reply": "2024-03-29T15:39:19.905345Z",
"shell.execute_reply.started": "2024-03-29T15:39:19.901529Z"
}
},
"outputs": [],
"source": [
"embeddings = Model2vecEmbeddings(\"minishlab/potion-base-8M\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "abcf98b7-424c-4691-a1cd-862c3d53be11",
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-24T15:13:17.844903Z",
"start_time": "2023-05-24T15:13:17.198751Z"
},
"execution": {
"iopub.execute_input": "2024-03-29T15:39:20.434581Z",
"iopub.status.busy": "2024-03-29T15:39:20.433117Z",
"iopub.status.idle": "2024-03-29T15:39:22.178650Z",
"shell.execute_reply": "2024-03-29T15:39:22.176058Z",
"shell.execute_reply.started": "2024-03-29T15:39:20.434501Z"
},
"scrolled": true
},
"outputs": [],
"source": [
"query_text = \"This is a test query.\"\n",
"query_result = embeddings.embed_query(query_text)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "98897454-b280-4ee1-bbb9-2c6c15342f87",
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-24T15:13:18.605339Z",
"start_time": "2023-05-24T15:13:17.845906Z"
},
"execution": {
"iopub.execute_input": "2024-03-29T15:39:28.164009Z",
"iopub.status.busy": "2024-03-29T15:39:28.161759Z",
"iopub.status.idle": "2024-03-29T15:39:30.217232Z",
"shell.execute_reply": "2024-03-29T15:39:30.215348Z",
"shell.execute_reply.started": "2024-03-29T15:39:28.163876Z"
},
"scrolled": true
},
"outputs": [],
"source": [
"document_text = \"This is a test document.\"\n",
"document_result = embeddings.embed_documents([document_text])"
]
},
{
"cell_type": "markdown",
"id": "11bac134",
"metadata": {},
"source": [
"## Direct Usage\n",
"\n",
"Here's how you would directly make use of `model2vec`\n",
"\n",
"```python\n",
"from model2vec import StaticModel\n",
"\n",
"# Load a model from the HuggingFace hub (in this case the potion-base-8M model)\n",
"model = StaticModel.from_pretrained(\"minishlab/potion-base-8M\")\n",
"\n",
"# Make embeddings\n",
"embeddings = model.encode([\"It's dangerous to go alone!\", \"It's a secret to everybody.\"])\n",
"\n",
"# Make sequences of token embeddings\n",
"token_embeddings = model.encode_as_sequence([\"It's dangerous to go alone!\", \"It's a secret to everybody.\"])\n",
"```"
]
},
{
"cell_type": "markdown",
"id": "d81e21aa",
"metadata": {},
"source": [
"## API Reference\n",
"\n",
"For more information check out the model2vec github [repo](https://github.com/MinishLab/model2vec)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

View File

@@ -1,380 +0,0 @@
{
"cells": [
{
"cell_type": "raw",
"id": "10238e62-3465-4973-9279-606cbb7ccf16",
"metadata": {},
"source": [
"---\n",
"sidebar_label: ScrapeGraph\n",
"---"
]
},
{
"cell_type": "markdown",
"id": "a6f91f20",
"metadata": {},
"source": [
"# ScrapeGraph\n",
"\n",
"This notebook provides a quick overview for getting started with ScrapeGraph [tools](/docs/integrations/tools/). For detailed documentation of all ScrapeGraph features and configurations head to the [API reference](https://python.langchain.com/docs/integrations/tools/scrapegraph).\n",
"\n",
"For more information about ScrapeGraph AI:\n",
"- [ScrapeGraph AI Website](https://scrapegraphai.com)\n",
"- [Open Source Project](https://github.com/ScrapeGraphAI/Scrapegraph-ai)\n",
"\n",
"## Overview\n",
"\n",
"### Integration details\n",
"\n",
"| Class | Package | Serializable | JS support | Package latest |\n",
"| :--- | :--- | :---: | :---: | :---: |\n",
"| [SmartScraperTool](https://python.langchain.com/docs/integrations/tools/scrapegraph) | langchain-scrapegraph | ✅ | ❌ | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-scrapegraph?style=flat-square&label=%20) |\n",
"| [MarkdownifyTool](https://python.langchain.com/docs/integrations/tools/scrapegraph) | langchain-scrapegraph | ✅ | ❌ | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-scrapegraph?style=flat-square&label=%20) |\n",
"| [LocalScraperTool](https://python.langchain.com/docs/integrations/tools/scrapegraph) | langchain-scrapegraph | ✅ | ❌ | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-scrapegraph?style=flat-square&label=%20) |\n",
"| [GetCreditsTool](https://python.langchain.com/docs/integrations/tools/scrapegraph) | langchain-scrapegraph | ✅ | ❌ | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-scrapegraph?style=flat-square&label=%20) |\n",
"\n",
"### Tool features\n",
"\n",
"| Tool | Purpose | Input | Output |\n",
"| :--- | :--- | :--- | :--- |\n",
"| SmartScraperTool | Extract structured data from websites | URL + prompt | JSON |\n",
"| MarkdownifyTool | Convert webpages to markdown | URL | Markdown text |\n",
"| LocalScraperTool | Extract data from HTML content | HTML + prompt | JSON |\n",
"| GetCreditsTool | Check API credits | None | Credit info |\n",
"\n",
"\n",
"## Setup\n",
"\n",
"The integration requires the following packages:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f85b4089",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --quiet -U langchain-scrapegraph"
]
},
{
"cell_type": "markdown",
"id": "b15e9266",
"metadata": {},
"source": [
"### Credentials\n",
"\n",
"You'll need a ScrapeGraph AI API key to use these tools. Get one at [scrapegraphai.com](https://scrapegraphai.com)."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e0b178a2",
"metadata": {},
"outputs": [],
"source": [
"import getpass\n",
"import os\n",
"\n",
"if not os.environ.get(\"SGAI_API_KEY\"):\n",
" os.environ[\"SGAI_API_KEY\"] = getpass.getpass(\"ScrapeGraph AI API key:\\n\")"
]
},
{
"cell_type": "markdown",
"id": "bc5ab717",
"metadata": {},
"source": [
"It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a6c2f136",
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
]
},
{
"cell_type": "markdown",
"id": "1c97218f",
"metadata": {},
"source": [
"## Instantiation\n",
"\n",
"Here we show how to instantiate instances of the ScrapeGraph tools:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8b3ddfe9",
"metadata": {},
"outputs": [],
"source": [
"from langchain_scrapegraph.tools import (\n",
" GetCreditsTool,\n",
" LocalScraperTool,\n",
" MarkdownifyTool,\n",
" SmartScraperTool,\n",
")\n",
"\n",
"smartscraper = SmartScraperTool()\n",
"markdownify = MarkdownifyTool()\n",
"localscraper = LocalScraperTool()\n",
"credits = GetCreditsTool()"
]
},
{
"cell_type": "markdown",
"id": "74147a1a",
"metadata": {},
"source": [
"## Invocation\n",
"\n",
"### [Invoke directly with args](/docs/concepts/tools)\n",
"\n",
"Let's try each tool individually:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "65310a8b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SmartScraper Result: {'company_name': 'ScrapeGraphAI', 'description': \"ScrapeGraphAI is a powerful AI web scraping tool that turns entire websites into clean, structured data through a simple API. It's designed to help developers and AI companies extract valuable data from websites efficiently and transform it into formats that are ready for use in LLM applications and data analysis.\"}\n",
"\n",
"Markdownify Result (first 200 chars): [![ScrapeGraphAI Logo](https://scrapegraphai.com/images/scrapegraphai_logo.svg)ScrapeGraphAI](https://scrapegraphai.com/)\n",
"\n",
"PartnersPricingFAQ[Blog](https://scrapegraphai.com/blog)DocsLog inSign up\n",
"\n",
"Op\n",
"LocalScraper Result: {'company_name': 'Company Name', 'description': 'We are a technology company focused on AI solutions.', 'contact': {'email': 'contact@example.com', 'phone': '(555) 123-4567'}}\n",
"\n",
"Credits Info: {'remaining_credits': 49679, 'total_credits_used': 914}\n"
]
}
],
"source": [
"# SmartScraper\n",
"result = smartscraper.invoke(\n",
" {\n",
" \"user_prompt\": \"Extract the company name and description\",\n",
" \"website_url\": \"https://scrapegraphai.com\",\n",
" }\n",
")\n",
"print(\"SmartScraper Result:\", result)\n",
"\n",
"# Markdownify\n",
"markdown = markdownify.invoke({\"website_url\": \"https://scrapegraphai.com\"})\n",
"print(\"\\nMarkdownify Result (first 200 chars):\", markdown[:200])\n",
"\n",
"local_html = \"\"\"\n",
"<html>\n",
" <body>\n",
" <h1>Company Name</h1>\n",
" <p>We are a technology company focused on AI solutions.</p>\n",
" <div class=\"contact\">\n",
" <p>Email: contact@example.com</p>\n",
" <p>Phone: (555) 123-4567</p>\n",
" </div>\n",
" </body>\n",
"</html>\n",
"\"\"\"\n",
"\n",
"# LocalScraper\n",
"result_local = localscraper.invoke(\n",
" {\n",
" \"user_prompt\": \"Make a summary of the webpage and extract the email and phone number\",\n",
" \"website_html\": local_html,\n",
" }\n",
")\n",
"print(\"LocalScraper Result:\", result_local)\n",
"\n",
"# Check credits\n",
"credits_info = credits.invoke({})\n",
"print(\"\\nCredits Info:\", credits_info)"
]
},
{
"cell_type": "markdown",
"id": "d6e73897",
"metadata": {},
"source": [
"### [Invoke with ToolCall](/docs/concepts/tools)\n",
"\n",
"We can also invoke the tool with a model-generated ToolCall:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f90e33a7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ToolMessage(content='{\"main_heading\": \"Get the data you need from any website\", \"description\": \"Easily extract and gather information with just a few lines of code with a simple api. Turn websites into clean and usable structured data.\"}', name='SmartScraper', tool_call_id='1')"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model_generated_tool_call = {\n",
" \"args\": {\n",
" \"user_prompt\": \"Extract the main heading and description\",\n",
" \"website_url\": \"https://scrapegraphai.com\",\n",
" },\n",
" \"id\": \"1\",\n",
" \"name\": smartscraper.name,\n",
" \"type\": \"tool_call\",\n",
"}\n",
"smartscraper.invoke(model_generated_tool_call)"
]
},
{
"cell_type": "markdown",
"id": "659f9fbd",
"metadata": {},
"source": [
"## Chaining\n",
"\n",
"Let's use our tools with an LLM to analyze a website:\n",
"\n",
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
"\n",
"<ChatModelTabs customVarName=\"llm\" />"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "af3123ad",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"# | output: false\n",
"# | echo: false\n",
"\n",
"# %pip install -qU langchain langchain-openai\n",
"from langchain.chat_models import init_chat_model\n",
"\n",
"llm = init_chat_model(model=\"gpt-4o\", model_provider=\"openai\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "fdbf35b5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content='ScrapeGraph AI is an AI-powered web scraping tool that efficiently extracts and converts website data into structured formats via a simple API. It caters to developers, data scientists, and AI researchers, offering features like easy integration, support for dynamic content, and scalability for large projects. It supports various website types, including business, e-commerce, and educational sites. Contact: contact@scrapegraphai.com.', additional_kwargs={'tool_calls': [{'id': 'call_shkRPyjyAtfjH9ffG5rSy9xj', 'function': {'arguments': '{\"user_prompt\":\"Extract details about the products, services, and key features offered by ScrapeGraph AI, as well as any unique selling points or innovations mentioned on the website.\",\"website_url\":\"https://scrapegraphai.com\"}', 'name': 'SmartScraper'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 47, 'prompt_tokens': 480, 'total_tokens': 527, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_c7ca0ebaca', 'finish_reason': 'stop', 'logprobs': None}, id='run-45a12c86-d499-4273-8c59-0db926799bc7-0', tool_calls=[{'name': 'SmartScraper', 'args': {'user_prompt': 'Extract details about the products, services, and key features offered by ScrapeGraph AI, as well as any unique selling points or innovations mentioned on the website.', 'website_url': 'https://scrapegraphai.com'}, 'id': 'call_shkRPyjyAtfjH9ffG5rSy9xj', 'type': 'tool_call'}], usage_metadata={'input_tokens': 480, 'output_tokens': 47, 'total_tokens': 527, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain_core.prompts import ChatPromptTemplate\n",
"from langchain_core.runnables import RunnableConfig, chain\n",
"\n",
"prompt = ChatPromptTemplate(\n",
" [\n",
" (\n",
" \"system\",\n",
" \"You are a helpful assistant that can use tools to extract structured information from websites.\",\n",
" ),\n",
" (\"human\", \"{user_input}\"),\n",
" (\"placeholder\", \"{messages}\"),\n",
" ]\n",
")\n",
"\n",
"llm_with_tools = llm.bind_tools([smartscraper], tool_choice=smartscraper.name)\n",
"llm_chain = prompt | llm_with_tools\n",
"\n",
"\n",
"@chain\n",
"def tool_chain(user_input: str, config: RunnableConfig):\n",
" input_ = {\"user_input\": user_input}\n",
" ai_msg = llm_chain.invoke(input_, config=config)\n",
" tool_msgs = smartscraper.batch(ai_msg.tool_calls, config=config)\n",
" return llm_chain.invoke({**input_, \"messages\": [ai_msg, *tool_msgs]}, config=config)\n",
"\n",
"\n",
"tool_chain.invoke(\n",
" \"What does ScrapeGraph AI do? Extract this information from their website https://scrapegraphai.com\"\n",
")"
]
},
{
"cell_type": "markdown",
"id": "4ac8146c",
"metadata": {},
"source": [
"## API reference\n",
"\n",
"For detailed documentation of all ScrapeGraph features and configurations head to the Langchain API reference: https://python.langchain.com/docs/integrations/tools/scrapegraph\n",
"\n",
"Or to the official SDK repo: https://github.com/ScrapeGraphAI/langchain-scrapegraph"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -1,959 +0,0 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "3fe4f4a9-8810-428c-90cb-147ad8563025",
"language": "python"
},
"source": [
"# SQLServer "
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "f791e7da-9710-4f15-93f0-6ea61840a25f",
"language": "python"
},
"source": [
">Azure SQL provides a dedicated [Vector data type](https:\\learn.microsoft.com\\sql\\t-sql\\data-types\\vector-data-type?view=azuresqldb-current&viewFallbackFrom=sql-server-ver16&tabs=csharp-sample) that simplifies the creation, storage, and querying of vector embeddings directly within a relational database. This eliminates the need for separate vector databases and related integrations, increasing the security of your solutions while reducing the overall complexity.\n",
"\n",
"Azure SQL is a robust service that combines scalability, security, and high availability, providing all the benefits of a modern database solution. It leverages a sophisticated query optimizer and enterprise features to perform vector similarity searches alongside traditional SQL queries, enhancing data analysis and decision-making. \n",
" \n",
"Read more on using [Intelligent applications with Azure SQL Database](https://learn.microsoft.com/azure/azure-sql/database/ai-artificial-intelligence-intelligent-applications?view=azuresql)\n",
"\n",
"This notebook shows you how to leverage this integrated SQL [vector database](https://devblogs.microsoft.com/azure-sql/exciting-announcement-public-preview-of-native-vector-support-in-azure-sql-database/) to store documents and perform vector search queries using Cosine (cosine distance), L2 (Euclidean distance), and IP (inner product) to locate documents close to the query vectors"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "320f08b1-2fac-46fe-8e3a-273b6bf6ca8d",
"language": "python"
},
"source": [
"## Setup\n",
" \n",
"Install the `langchain-sqlserver` python package.\n",
"\n",
"The code lives in an integration package called:[langchain-sqlserver](https:\\github.com\\langchain-ai\\langchain-azure\\tree\\main\\libs\\sqlserver)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"azdata_cell_guid": "5fa6ff09-79d5-4023-9005-91a217f91a5b",
"language": "python"
},
"outputs": [],
"source": [
"!pip install langchain-sqlserver==0.1.1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Credentials\n",
"\n",
"There are no credentials needed to run this notebook, just make sure you downloaded the `langchain_sqlserver` package\n",
"If you want to get best in-class automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialization"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"azdata_cell_guid": "4113da9c-b0fe-4e01-bc06-cafe05634fb6",
"language": "python"
},
"outputs": [],
"source": [
"from langchain_sqlserver import SQLServer_VectorStore"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "458deaef-f985-4efe-957c-7840509fdfa3",
"language": "python"
},
"source": [
"Find your Azure SQL DB connection string in the Azure portal under your database settings\n",
"\n",
"For more info: [Connect to Azure SQL DB - Python](https:\\learn.microsoft.com\\en-us\\azure\\azure-sql\\database\\connect-query-python?view=azuresql)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"azdata_cell_guid": "d3439463-899e-48aa-88a1-ba6bdedbdc9d",
"language": "python"
},
"outputs": [],
"source": [
"import os\n",
"\n",
"import pyodbc\n",
"\n",
"# Define your SQLServer Connection String\n",
"_CONNECTION_STRING = (\n",
" \"Driver={ODBC Driver 18 for SQL Server};\"\n",
" \"Server=<YOUR_DBSERVER>.database.windows.net,1433;\"\n",
" \"Database=test;\"\n",
" \"TrustServerCertificate=yes;\"\n",
" \"Connection Timeout=60;\"\n",
" \"LongAsMax=yes;\"\n",
")\n",
"\n",
"# Connection string can vary:\n",
"# \"mssql+pyodbc://<username>:<password><servername>/<dbname>?driver=ODBC+Driver+18+for+SQL+Server\" -> With Username and Password specified\n",
"# \"mssql+pyodbc://<servername>/<dbname>?driver=ODBC+Driver+18+for+SQL+Server&Trusted_connection=yes\" -> Uses Trusted connection\n",
"# \"mssql+pyodbc://<servername>/<dbname>?driver=ODBC+Driver+18+for+SQL+Server\" -> Uses EntraID connection\n",
"# \"mssql+pyodbc://<servername>/<dbname>?driver=ODBC+Driver+18+for+SQL+Server&Trusted_connection=no\" -> Uses EntraID connection"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "dcbdafc3-71ec-4e73-b768-ffb49dae2aee",
"language": "python"
},
"source": [
"In this example we use Azure OpenAI to generate embeddings , however you can use different embeddings provided in LangChain.\n",
"\n",
"You can deploy a version of Azure OpenAI instance on Azure Portal following this [guide](https:\\learn.microsoft.com\\en-us\\azure\\ai-services\\openai\\how-to\\create-resource?pivots=web-portal). Once you have your instance running, make sure you have the name of your instance and key. You can find the key in the Azure Portal, under the \"Keys and Endpoint\" section of your instance."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"azdata_cell_guid": "a65110ff-cfa4-498c-bb7a-d937c04872c0",
"language": "python"
},
"outputs": [],
"source": [
"!pip install langchain-openai"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"azdata_cell_guid": "3bd306b1-f346-4c01-93f4-039827e4f2e6",
"language": "python"
},
"outputs": [],
"source": [
"# Import the necessary Libraries\n",
"from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings\n",
"\n",
"# Set your AzureOpenAI details\n",
"azure_endpoint = \"https://<YOUR_ENDPOINT>.openai.azure.com/\"\n",
"azure_deployment_name_embedding = \"text-embedding-3-small\"\n",
"azure_deployment_name_chatcompletion = \"chatcompletion\"\n",
"azure_api_version = \"2023-05-15\"\n",
"azure_api_key = \"YOUR_KEY\"\n",
"\n",
"\n",
"# Use AzureChatOpenAI for chat completions\n",
"llm = AzureChatOpenAI(\n",
" azure_endpoint=azure_endpoint,\n",
" azure_deployment=azure_deployment_name_chatcompletion,\n",
" openai_api_version=azure_api_version,\n",
" openai_api_key=azure_api_key,\n",
")\n",
"\n",
"# Use AzureOpenAIEmbeddings for embeddings\n",
"embeddings = AzureOpenAIEmbeddings(\n",
" azure_endpoint=azure_endpoint,\n",
" azure_deployment=azure_deployment_name_embedding,\n",
" openai_api_version=azure_api_version,\n",
" openai_api_key=azure_api_key,\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "f1f10145-06db-4cab-853f-9eb3b6fa8ada",
"language": "python"
},
"source": [
"## Manage vector store  "
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"azdata_cell_guid": "c4033f67-bea2-4859-af4d-b41f3b929978",
"language": "python"
},
"outputs": [],
"source": [
"from langchain_community.vectorstores.utils import DistanceStrategy\n",
"from langchain_sqlserver import SQLServer_VectorStore\n",
"\n",
"# Initialize the vector store\n",
"vector_store = SQLServer_VectorStore(\n",
" connection_string=_CONNECTION_STRING,\n",
" distance_strategy=DistanceStrategy.COSINE, # optional, if not provided, defaults to COSINE\n",
" embedding_function=embeddings, # you can use different embeddings provided in LangChain\n",
" embedding_length=1536,\n",
" table_name=\"langchain_test_table\", # using table with a custom name\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "525f611b-2bd5-4fd4-9192-93d588c5ad0b",
"language": "python"
},
"source": [
"### Add items to vector store"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"azdata_cell_guid": "6410813d-0ff1-44dd-b6bb-32fd74772e4f",
"language": "python"
},
"outputs": [],
"source": [
"## we will use some artificial data for this example\n",
"query = [\n",
" \"I have bought several of the Vitality canned dog food products and have found them all to be of good quality. The product looks more like a stew than a processed meat and it smells better. My Labrador is finicky and she appreciates this product better than most.\",\n",
" \"The candy is just red , No flavor . Just plan and chewy . I would never buy them again\",\n",
" \"Arrived in 6 days and were so stale i could not eat any of the 6 bags!!\",\n",
" \"Got these on sale for roughly 25 cents per cup, which is half the price of my local grocery stores, plus they rarely stock the spicy flavors. These things are a GREAT snack for my office where time is constantly crunched and sometimes you can't escape for a real meal. This is one of my favorite flavors of Instant Lunch and will be back to buy every time it goes on sale.\",\n",
" \"If you are looking for a less messy version of licorice for the children, then be sure to try these! They're soft, easy to chew, and they don't get your hands all sticky and gross in the car, in the summer, at the beach, etc. We love all the flavos and sometimes mix these in with the chocolate to have a very nice snack! Great item, great price too, highly recommend!\",\n",
" \"We had trouble finding this locally - delivery was fast, no more hunting up and down the flour aisle at our local grocery stores.\",\n",
" \"Too much of a good thing? We worked this kibble in over time, slowly shifting the percentage of Felidae to national junk-food brand until the bowl was all natural. By this time, the cats couldn't keep it in or down. What a mess. We've moved on.\",\n",
" \"Hey, the description says 360 grams - that is roughly 13 ounces at under $4.00 per can. No way - that is the approximate price for a 100 gram can.\",\n",
" \"The taste of these white cheddar flat breads is like a regular cracker - which is not bad, except that I bought them because I wanted a cheese taste.<br /><br />What was a HUGE disappointment? How misleading the packaging of the box is. The photo on the box (I bought these in store) makes it look like it is full of long flatbreads (expanding the length and width of the box). Wrong! The plastic tray that holds the crackers is about 2\"\n",
" \" smaller all around - leaving you with about 15 or so small flatbreads.<br /><br />What is also bad about this is that the company states they use biodegradable and eco-friendly packaging. FAIL! They used a HUGE box for a ridiculously small amount of crackers. Not ecofriendly at all.<br /><br />Would I buy these again? No - I feel ripped off. The other crackers (like Sesame Tarragon) give you a little<br />more bang for your buck and have more flavor.\",\n",
" \"I have used this product in smoothies for my son and he loves it. Additionally, I use this oil in the shower as a skin conditioner and it has made my skin look great. Some of the stretch marks on my belly has disappeared quickly. Highly recommend!!!\",\n",
" \"Been taking Coconut Oil for YEARS. This is the best on the retail market. I wish it was in glass, but this is the one.\",\n",
"]\n",
"\n",
"query_metadata = [\n",
" {\"id\": 1, \"summary\": \"Good Quality Dog Food\"},\n",
" {\"id\": 8, \"summary\": \"Nasty No flavor\"},\n",
" {\"id\": 4, \"summary\": \"stale product\"},\n",
" {\"id\": 11, \"summary\": \"Great value and convenient ramen\"},\n",
" {\"id\": 5, \"summary\": \"Great for the kids!\"},\n",
" {\"id\": 2, \"summary\": \"yum falafel\"},\n",
" {\"id\": 9, \"summary\": \"Nearly killed the cats\"},\n",
" {\"id\": 6, \"summary\": \"Price cannot be correct\"},\n",
" {\"id\": 3, \"summary\": \"Taste is neutral, quantity is DECEITFUL!\"},\n",
" {\"id\": 7, \"summary\": \"This stuff is great\"},\n",
" {\"id\": 10, \"summary\": \"The reviews don't lie\"},\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"azdata_cell_guid": "03e8161a-6cdd-415d-8261-b6b99982726c",
"language": "python"
},
"outputs": [
{
"data": {
"text/plain": [
"[1, 8, 4, 11, 5, 2, 9, 6, 3, 7, 10]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vector_store.add_texts(texts=query, metadatas=query_metadata)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "a2838ad1-64a1-409e-b97d-7883b42a0b33",
"language": "python"
},
"source": [
"## Query vector store\n",
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent.\n",
"\n",
"Performing a simple similarity search can be done as follows:"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"azdata_cell_guid": "1baa2857-167e-4873-ad9c-e67649ef39bf",
"language": "python"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(metadata={'id': 1, 'summary': 'Good Quality Dog Food'}, page_content='I have bought several of the Vitality canned dog food products and have found them all to be of good quality. The product looks more like a stew than a processed meat and it smells better. My Labrador is finicky and she appreciates this product better than most.'), Document(metadata={'id': 7, 'summary': 'This stuff is great'}, page_content='I have used this product in smoothies for my son and he loves it. Additionally, I use this oil in the shower as a skin conditioner and it has made my skin look great. Some of the stretch marks on my belly has disappeared quickly. Highly recommend!!!'), Document(metadata={'id': 5, 'summary': 'Great for the kids!'}, page_content=\"If you are looking for a less messy version of licorice for the children, then be sure to try these! They're soft, easy to chew, and they don't get your hands all sticky and gross in the car, in the summer, at the beach, etc. We love all the flavos and sometimes mix these in with the chocolate to have a very nice snack! Great item, great price too, highly recommend!\")]\n"
]
}
],
"source": [
"# Perform a similarity search between the embedding of the query and the embeddings of the documents\n",
"simsearch_result = vector_store.similarity_search(\"Good reviews\", k=3)\n",
"print(simsearch_result)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "f92f0a1b-19aa-46d1-ad1a-c2e52f9114d0",
"language": "python"
},
"source": [
"### Filtering Support:\n",
"\n",
"The vectorstore supports a set of filters that can be applied against the metadata fields of the documents.This feature enables developers and data analysts to refine their queries, ensuring that the search results are accurately aligned with their needs. By applying filters based on specific metadata attributes, users can limit the scope of their searches, concentrating only on the most relevant data subsets."
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"azdata_cell_guid": "24fabd60-0b29-4ed9-9d5e-38c68fe05dfa",
"language": "python"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(metadata={'id': 7, 'summary': 'This stuff is great'}, page_content='I have used this product in smoothies for my son and he loves it. Additionally, I use this oil in the shower as a skin conditioner and it has made my skin look great. Some of the stretch marks on my belly has disappeared quickly. Highly recommend!!!'), Document(metadata={'id': 5, 'summary': 'Great for the kids!'}, page_content=\"If you are looking for a less messy version of licorice for the children, then be sure to try these! They're soft, easy to chew, and they don't get your hands all sticky and gross in the car, in the summer, at the beach, etc. We love all the flavos and sometimes mix these in with the chocolate to have a very nice snack! Great item, great price too, highly recommend!\"), Document(metadata={'id': 3, 'summary': 'Taste is neutral, quantity is DECEITFUL!'}, page_content='The taste of these white cheddar flat breads is like a regular cracker - which is not bad, except that I bought them because I wanted a cheese taste.<br /><br />What was a HUGE disappointment? How misleading the packaging of the box is. The photo on the box (I bought these in store) makes it look like it is full of long flatbreads (expanding the length and width of the box). Wrong! The plastic tray that holds the crackers is about 2 smaller all around - leaving you with about 15 or so small flatbreads.<br /><br />What is also bad about this is that the company states they use biodegradable and eco-friendly packaging. FAIL! They used a HUGE box for a ridiculously small amount of crackers. Not ecofriendly at all.<br /><br />Would I buy these again? No - I feel ripped off. The other crackers (like Sesame Tarragon) give you a little<br />more bang for your buck and have more flavor.')]\n"
]
}
],
"source": [
"# hybrid search -> filter for cases where id not equal to 1.\n",
"hybrid_simsearch_result = vector_store.similarity_search(\n",
" \"Good reviews\", k=3, filter={\"id\": {\"$ne\": 1}}\n",
")\n",
"print(hybrid_simsearch_result)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "449c4cde-e303-4856-8deb-6e6ad56f9501",
"language": "python"
},
"source": [
"### Similarity Search with Score:\n",
"If you want to execute a similarity search and receive the corresponding scores you can run:"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"azdata_cell_guid": "382fa5d4-6da1-46c1-987f-6d0ec050be99",
"language": "python",
"tags": [
"hide_input"
]
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(Document(metadata={'id': 3, 'summary': 'Taste is neutral, quantity is DECEITFUL!'}, page_content='The taste of these white cheddar flat breads is like a regular cracker - which is not bad, except that I bought them because I wanted a cheese taste.<br /><br />What was a HUGE disappointment? How misleading the packaging of the box is. The photo on the box (I bought these in store) makes it look like it is full of long flatbreads (expanding the length and width of the box). Wrong! The plastic tray that holds the crackers is about 2 smaller all around - leaving you with about 15 or so small flatbreads.<br /><br />What is also bad about this is that the company states they use biodegradable and eco-friendly packaging. FAIL! They used a HUGE box for a ridiculously small amount of crackers. Not ecofriendly at all.<br /><br />Would I buy these again? No - I feel ripped off. The other crackers (like Sesame Tarragon) give you a little<br />more bang for your buck and have more flavor.'), 0.651870006770711), (Document(metadata={'id': 8, 'summary': 'Nasty No flavor'}, page_content='The candy is just red , No flavor . Just plan and chewy . I would never buy them again'), 0.6908952973052638), (Document(metadata={'id': 4, 'summary': 'stale product'}, page_content='Arrived in 6 days and were so stale i could not eat any of the 6 bags!!'), 0.7360955776468822), (Document(metadata={'id': 1, 'summary': 'Good Quality Dog Food'}, page_content='I have bought several of the Vitality canned dog food products and have found them all to be of good quality. The product looks more like a stew than a processed meat and it smells better. My Labrador is finicky and she appreciates this product better than most.'), 0.7408823529514486), (Document(metadata={'id': 9, 'summary': 'Nearly killed the cats'}, page_content=\"Too much of a good thing? We worked this kibble in over time, slowly shifting the percentage of Felidae to national junk-food brand until the bowl was all natural. By this time, the cats couldn't keep it in or down. What a mess. We've moved on.\"), 0.782995248991772), (Document(metadata={'id': 7, 'summary': 'This stuff is great'}, page_content='I have used this product in smoothies for my son and he loves it. Additionally, I use this oil in the shower as a skin conditioner and it has made my skin look great. Some of the stretch marks on my belly has disappeared quickly. Highly recommend!!!'), 0.7912681479906212), (Document(metadata={'id': 2, 'summary': 'yum falafel'}, page_content='We had trouble finding this locally - delivery was fast, no more hunting up and down the flour aisle at our local grocery stores.'), 0.809213468778896), (Document(metadata={'id': 10, 'summary': \"The reviews don't lie\"}, page_content='Been taking Coconut Oil for YEARS. This is the best on the retail market. I wish it was in glass, but this is the one.'), 0.8281482301097155), (Document(metadata={'id': 5, 'summary': 'Great for the kids!'}, page_content=\"If you are looking for a less messy version of licorice for the children, then be sure to try these! They're soft, easy to chew, and they don't get your hands all sticky and gross in the car, in the summer, at the beach, etc. We love all the flavos and sometimes mix these in with the chocolate to have a very nice snack! Great item, great price too, highly recommend!\"), 0.8283754326400574), (Document(metadata={'id': 6, 'summary': 'Price cannot be correct'}, page_content='Hey, the description says 360 grams - that is roughly 13 ounces at under $4.00 per can. No way - that is the approximate price for a 100 gram can.'), 0.8323967822635847), (Document(metadata={'id': 11, 'summary': 'Great value and convenient ramen'}, page_content=\"Got these on sale for roughly 25 cents per cup, which is half the price of my local grocery stores, plus they rarely stock the spicy flavors. These things are a GREAT snack for my office where time is constantly crunched and sometimes you can't escape for a real meal. This is one of my favorite flavors of Instant Lunch and will be back to buy every time it goes on sale.\"), 0.8387189489406939)]\n"
]
}
],
"source": [
"simsearch_with_score_result = vector_store.similarity_search_with_score(\n",
" \"Not a very good product\", k=12\n",
")\n",
"print(simsearch_with_score_result)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "620e29bd-02f8-4dc7-91a2-52537cb08886",
"language": "python"
},
"source": [
"For a full list of the different searches you can execute on a Azure SQL vector store, please refer to the [API reference](https://python.langchain.com/api_reference/sqlserver/index.html)."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "ff48b371-b94f-4a3a-bd66-cce856baf6c4",
"language": "python"
},
"source": [
"### Similarity Search when you already have embeddings you want to search on"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"azdata_cell_guid": "35afb4cd-0682-4525-9ba8-625fecc59bb4",
"language": "python",
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(metadata={'id': 8, 'summary': 'Nasty No flavor'}, page_content='The candy is just red , No flavor . Just plan and chewy . I would never buy them again'), Document(metadata={'id': 4, 'summary': 'stale product'}, page_content='Arrived in 6 days and were so stale i could not eat any of the 6 bags!!'), Document(metadata={'id': 3, 'summary': 'Taste is neutral, quantity is DECEITFUL!'}, page_content='The taste of these white cheddar flat breads is like a regular cracker - which is not bad, except that I bought them because I wanted a cheese taste.<br /><br />What was a HUGE disappointment? How misleading the packaging of the box is. The photo on the box (I bought these in store) makes it look like it is full of long flatbreads (expanding the length and width of the box). Wrong! The plastic tray that holds the crackers is about 2 smaller all around - leaving you with about 15 or so small flatbreads.<br /><br />What is also bad about this is that the company states they use biodegradable and eco-friendly packaging. FAIL! They used a HUGE box for a ridiculously small amount of crackers. Not ecofriendly at all.<br /><br />Would I buy these again? No - I feel ripped off. The other crackers (like Sesame Tarragon) give you a little<br />more bang for your buck and have more flavor.'), Document(metadata={'id': 6, 'summary': 'Price cannot be correct'}, page_content='Hey, the description says 360 grams - that is roughly 13 ounces at under $4.00 per can. No way - that is the approximate price for a 100 gram can.')]\n"
]
}
],
"source": [
"# if you already have embeddings you want to search on\n",
"simsearch_by_vector = vector_store.similarity_search_by_vector(\n",
" [-0.0033353185281157494, -0.017689190804958344, -0.01590404286980629, ...]\n",
")\n",
"print(simsearch_by_vector)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"azdata_cell_guid": "8a7083fd-ddb2-4187-a315-744b7a623178",
"language": "python"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(Document(metadata={'id': 8, 'summary': 'Nasty No flavor'}, page_content='The candy is just red , No flavor . Just plan and chewy . I would never buy them again'), 0.9648153551769503), (Document(metadata={'id': 4, 'summary': 'stale product'}, page_content='Arrived in 6 days and were so stale i could not eat any of the 6 bags!!'), 0.9655108580341948), (Document(metadata={'id': 3, 'summary': 'Taste is neutral, quantity is DECEITFUL!'}, page_content='The taste of these white cheddar flat breads is like a regular cracker - which is not bad, except that I bought them because I wanted a cheese taste.<br /><br />What was a HUGE disappointment? How misleading the packaging of the box is. The photo on the box (I bought these in store) makes it look like it is full of long flatbreads (expanding the length and width of the box). Wrong! The plastic tray that holds the crackers is about 2 smaller all around - leaving you with about 15 or so small flatbreads.<br /><br />What is also bad about this is that the company states they use biodegradable and eco-friendly packaging. FAIL! They used a HUGE box for a ridiculously small amount of crackers. Not ecofriendly at all.<br /><br />Would I buy these again? No - I feel ripped off. The other crackers (like Sesame Tarragon) give you a little<br />more bang for your buck and have more flavor.'), 0.9840511208615808), (Document(metadata={'id': 6, 'summary': 'Price cannot be correct'}, page_content='Hey, the description says 360 grams - that is roughly 13 ounces at under $4.00 per can. No way - that is the approximate price for a 100 gram can.'), 0.9915737524649991)]\n"
]
}
],
"source": [
"# Similarity Search with Score if you already have embeddings you want to search on\n",
"simsearch_by_vector_with_score = vector_store.similarity_search_by_vector_with_score(\n",
" [-0.0033353185281157494, -0.017689190804958344, -0.01590404286980629, ...]\n",
")\n",
"print(simsearch_by_vector_with_score)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Delete items from vector store"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "01f30a69-76cb-4137-bb80-1061abc095be",
"language": "python"
},
"source": [
"### Delete Row by ID"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"azdata_cell_guid": "1b42828c-0850-4d89-a1b5-a463bae0f143",
"language": "python"
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# delete row by id\n",
"vector_store.delete([\"3\", \"7\"])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "51b9a47e-a17a-4427-8abe-90d87fd63389",
"language": "python"
},
"source": [
"### Drop Vector Store"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"azdata_cell_guid": "cc9a281a-d204-4830-83d0-fcdd890c7f9c",
"language": "python"
},
"outputs": [],
"source": [
"# drop vectorstore\n",
"vector_store.drop()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "2d1b942b-f1ca-4fb5-abb7-bb2855631962",
"language": "python"
},
"source": [
"## Load a Document from Azure Blob Storage"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "cab89a29-e5e3-44b6-8f29-b4470d26f5d4",
"language": "python"
},
"source": [
"Below is example of loading a file from Azure Blob Storage container into the SQL Vector store after splitting the document into chunks.\n",
"[Azure Blog Storage](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction) is Microsoft's object storage solution for the cloud. Blob Storage is optimized for storing massive amounts of unstructured data. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"azdata_cell_guid": "6cff6a17-89b6-4d73-a92d-cf289dea4294",
"language": "python"
},
"outputs": [],
"source": [
"pip install azure-storage-blob"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"azdata_cell_guid": "d9127900-0942-48f1-bd4d-081c7fa3fcae",
"language": "python"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of split documents: 528\n"
]
}
],
"source": [
"from langchain.document_loaders import AzureBlobStorageFileLoader\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_core.documents import Document\n",
"\n",
"# Define your connection string and blob details\n",
"conn_str = \"DefaultEndpointsProtocol=https;AccountName=<YourBlobName>;AccountKey=<YourAccountKey>==;EndpointSuffix=core.windows.net\"\n",
"container_name = \"<YourContainerName\"\n",
"blob_name = \"01 Harry Potter and the Sorcerers Stone.txt\"\n",
"\n",
"# Create an instance of AzureBlobStorageFileLoader\n",
"loader = AzureBlobStorageFileLoader(\n",
" conn_str=conn_str, container=container_name, blob_name=blob_name\n",
")\n",
"\n",
"# Load the document from Azure Blob Storage\n",
"documents = loader.load()\n",
"\n",
"# Split the document into smaller chunks if necessary\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)\n",
"split_documents = text_splitter.split_documents(documents)\n",
"\n",
"# Print the number of split documents\n",
"print(f\"Number of split documents: {len(split_documents)}\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "28319d6e-8e09-4355-85d5-da5cadd589d9"
},
"source": [
"API Reference:[AzureBlobStorageContainerLoader](https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.azure_blob_storage_container.AzureBlobStorageContainerLoader.html)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"azdata_cell_guid": "d017cccc-9d8b-459c-a33d-06860360be1a",
"language": "python"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Documents added to the vector store successfully!\n"
]
}
],
"source": [
"# # Initialize the vector store & insert the documents in AzureSQLDB with their embeddings\n",
"vector_store = SQLServer_VectorStore(\n",
" connection_string=_CONNECTION_STRING,\n",
" distance_strategy=DistanceStrategy.COSINE,\n",
" embedding_function=embeddings,\n",
" embedding_length=1536,\n",
" table_name=\"harrypotter\",\n",
") # Replace with your actual vector store initialization\n",
"\n",
"# Add split documents to the vector store individually\n",
"for i, doc in enumerate(split_documents):\n",
" vector_store.add_documents(documents=[doc], ids=[f\"doc_{i}\"])\n",
"\n",
"print(\"Documents added to the vector store successfully!\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Query directly"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"azdata_cell_guid": "fe5c369f-17b6-46f1-b946-081cb73d03c8",
"language": "python"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------------------------------------------------------\n",
"Score: 0.3626232679001803\n",
"The Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it. They didnt think they could bear it if anyone found out about the Potters. Mrs. Potter was Mrs. Dursleys sister, but they hadnt met for several years; in fact, Mrs. Dursley pretended she didnt have a sister, because her sister and her good-for-nothing husband were as unDursleyish as it was possible to be. The Dursleys shuddered to think what the neighbors would say if the Potters arrived in the street. The Dursleys knew that the Potters had a small son, too, but they had never even seen him. This boy was another good reason for keeping the Potters away; they didnt want Dudley mixing with a child like that.\n",
"------------------------------------------------------------\n",
"------------------------------------------------------------\n",
"Score: 0.44752797298657554\n",
"The Dursleys house had four bedrooms: one for Uncle Vernon and Aunt Petunia, one for visitors (usually Uncle Vernons sister, Marge), one where Dudley slept, and one where Dudley kept all the toys and things that wouldnt fit into his first bedroom. It only took Harry one trip upstairs to move everything he owned from the cupboard to this room. He sat down on the bed and stared around him. Nearly everything in here was broken. The month-old video camera was lying on top of a small, working tank Dudley had once driven over the next door neighbors dog; in the corner was Dudleys first-ever television set, which hed put his foot through when his favorite program had been canceled; there was a large birdcage, which had once held a parrot that Dudley had swapped at school for a real air rifle, which was up on a shelf with the end all bent because Dudley had sat on it. Other shelves were full of books. They were the only things in the room that looked as though theyd never been touched.\n",
"------------------------------------------------------------\n",
"------------------------------------------------------------\n",
"Score: 0.4652486419877385\n",
"M r. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people youd expect to be involved in anything strange or mysterious, because they just didnt hold with such nonsense.\n",
"\n",
"Mr. Dursley was the director of a firm called Grunnings, which made drills. He was a big, beefy man with hardly any neck, although he did have a very large mustache. Mrs. Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbors. The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere.\n",
"------------------------------------------------------------\n",
"------------------------------------------------------------\n",
"Score: 0.4739086301927252\n",
"Hagrid was watching him sadly.\n",
"\n",
"“Took yeh from the ruined house myself, on Dumbledores orders. Brought yeh ter this lot….”\n",
"\n",
"“Load of old tosh,” said Uncle Vernon. Harry jumped; he had almost forgotten that the Dursleys were there. Uncle Vernon certainly seemed to have got back his courage. He was glaring at Hagrid and his fists were clenched.\n",
"\n",
"“Now, you listen here, boy,” he snarled, “I accept theres something strange about you, probably nothing a good beating wouldnt have cured — and as for all this about your parents, well, they were weirdoes, no denying it, and the worlds better off without them in my opinion — asked for all they got, getting mixed up with these wizarding types — just what I expected, always knew theyd come to a sticky end —”\n",
"\n",
"But at that moment, Hagrid leapt from the sofa and drew a battered pink umbrella from inside his coat. Pointing this at Uncle Vernon like a sword, he said, “Im warning you, Dursley — Im warning you — one more word….”\n",
"------------------------------------------------------------\n"
]
}
],
"source": [
"from typing import List, Tuple\n",
"\n",
"# Perform similarity search\n",
"query = \"Why did the Dursleys not want Harry in their house?\"\n",
"docs_with_score: List[Tuple[Document, float]] = (\n",
" vector_store.similarity_search_with_score(query)\n",
")\n",
"\n",
"for doc, score in docs_with_score:\n",
" print(\"-\" * 60)\n",
" print(\"Score: \", score)\n",
" print(doc.page_content)\n",
" print(\"-\" * 60)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "13ca400b-7883-45af-9e17-75f2f622dde1",
"language": "python"
},
"source": [
"## Usage for retrieval-augmented generation\n",
"\n",
"#### Use Case 1: Q&A System based on the Story Book\n",
"\n",
"The Q&A function allows users to ask specific questions about the story, characters, and events, and get concise, context-rich answers. This not only enhances their understanding of the books but also makes them feel like they're part of the magical universe.\n",
"\n",
"## Query by turning into retriever\n",
"\n",
"The LangChain Vector store simplifies building sophisticated Q&A systems by enabling efficient similarity searches to find the top 10 relevant documents based on the user's query. The **retriever** is created from the **vector\\_store,** and the question-answer chain is built using the **create\\_stuff\\_documents\\_chain** function. A prompt template is crafted using the **ChatPromptTemplate** class, ensuring structured and context-rich responses. Often in Q&A applications it's important to show users the sources that were used to generate the answer. LangChain's built-in **create\\_retrieval\\_chain** will propagate retrieved source documents to the output under the \"context\" key:\n",
"\n",
"Read more about Langchain RAG tutorials & the terminologies mentioned above [here](https:/python.langchain.com/docs/tutorials/rag)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"azdata_cell_guid": "b60a1f0a-a767-413a-a537-61103439a26e",
"language": "python"
},
"outputs": [],
"source": [
"from typing import List, Tuple\n",
"\n",
"import pandas as pd\n",
"from langchain.chains import create_retrieval_chain\n",
"from langchain.chains.combine_documents import create_stuff_documents_chain\n",
"from langchain_core.prompts import ChatPromptTemplate\n",
"\n",
"\n",
"# Define the function to perform the RAG chain invocation\n",
"def get_answer_and_sources(user_query: str):\n",
" # Perform similarity search with scores\n",
" docs_with_score: List[Tuple[Document, float]] = (\n",
" vector_store.similarity_search_with_score(\n",
" user_query,\n",
" k=10,\n",
" )\n",
" )\n",
"\n",
" # Extract the context from the top results\n",
" context = \"\\n\".join([doc.page_content for doc, score in docs_with_score])\n",
"\n",
" # Define the system prompt\n",
" system_prompt = (\n",
" \"You are an assistant for question-answering tasks based on the story in the book. \"\n",
" \"Use the following pieces of retrieved context to answer the question. \"\n",
" \"If you don't know the answer, say that you don't know, but also suggest that the user can use the fan fiction function to generate fun stories. \"\n",
" \"Use 5 sentences maximum and keep the answer concise by also providing some background context of 1-2 sentences.\"\n",
" \"\\n\\n\"\n",
" \"{context}\"\n",
" )\n",
"\n",
" # Create the prompt template\n",
" prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\"system\", system_prompt),\n",
" (\"human\", \"{input}\"),\n",
" ]\n",
" )\n",
"\n",
" # Create the retriever and chains\n",
" retriever = vector_store.as_retriever()\n",
" question_answer_chain = create_stuff_documents_chain(llm, prompt)\n",
" rag_chain = create_retrieval_chain(retriever, question_answer_chain)\n",
"\n",
" # Define the input\n",
" input_data = {\"input\": user_query}\n",
"\n",
" # Invoke the RAG chain\n",
" response = rag_chain.invoke(input_data)\n",
"\n",
" # Print the answer\n",
" print(\"Answer:\", response[\"answer\"])\n",
"\n",
" # Prepare the data for the table\n",
" data = {\n",
" \"Doc ID\": [\n",
" doc.metadata.get(\"source\", \"N/A\").split(\"/\")[-1]\n",
" for doc in response[\"context\"]\n",
" ],\n",
" \"Content\": [\n",
" doc.page_content[:50] + \"...\"\n",
" if len(doc.page_content) > 100\n",
" else doc.page_content\n",
" for doc in response[\"context\"]\n",
" ],\n",
" }\n",
"\n",
" # Create a DataFrame\n",
" df = pd.DataFrame(data)\n",
"\n",
" # Print the table\n",
" print(\"\\nSources:\")\n",
" print(df.to_markdown(index=False))"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"azdata_cell_guid": "3cab0661-2351-4164-952f-67670addd99b",
"language": "python",
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Answer: When Harry first learned that he was a wizard, he felt quite sure there had been a horrible mistake. He struggled to believe it because he had spent his life being bullied and mistreated by the Dursleys. If he was really a wizard, he wondered why he hadn't been able to use magic to defend himself. This disbelief and surprise were evident when he gasped, “Im a what?”\n",
"\n",
"Sources:\n",
"| Doc ID | Content |\n",
"|:--------------------------------------------|:------------------------------------------------------|\n",
"| 01 Harry Potter and the Sorcerers Stone.txt | Harry was wondering what a wizard did once hed fi... |\n",
"| 01 Harry Potter and the Sorcerers Stone.txt | Harry realized his mouth was open and closed it qu... |\n",
"| 01 Harry Potter and the Sorcerers Stone.txt | “Most of us reckon hes still out there somewhere ... |\n",
"| 01 Harry Potter and the Sorcerers Stone.txt | “Ah, go boil yer heads, both of yeh,” said Hagrid.... |\n",
"\n"
]
}
],
"source": [
"# Define the user query\n",
"user_query = \"How did Harry feel when he first learnt that he was a Wizard?\"\n",
"\n",
"# Call the function to get the answer and sources\n",
"get_answer_and_sources(user_query)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"azdata_cell_guid": "1e1939d8-671f-4063-906c-89ee6813f12b",
"language": "python"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Yes, Harry had a pet owl named Hedwig. He decided to call her Hedwig after finding the name in a book titled *A History of Magic*.\n",
"\n",
"Sources:\n",
"| Doc ID | Content |\n",
"|:--------------------------------------------|:------------------------------------------------------|\n",
"| 01 Harry Potter and the Sorcerers Stone.txt | Harry sank down next to the bowl of peas. “What di... |\n",
"| 01 Harry Potter and the Sorcerers Stone.txt | Harry kept to his room, with his new owl for compa... |\n",
"| 01 Harry Potter and the Sorcerers Stone.txt | As the snake slid swiftly past him, Harry could ha... |\n",
"| 01 Harry Potter and the Sorcerers Stone.txt | Ron reached inside his jacket and pulled out a fat... |\n",
"\n"
]
}
],
"source": [
"# Define the user query\n",
"user_query = \"Did Harry have a pet? What was it\"\n",
"\n",
"# Call the function to get the answer and sources\n",
"get_answer_and_sources(user_query)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "d1f01a01-1e1d-4af6-95a3-82bad34419fe"
},
"source": [
"## API reference \n",
"\n",
"For detailed documentation of SQLServer Vectorstore features and configurations head to the API reference: [https://python.langchain.com/api\\_reference/sqlserver/index.html](https:\\python.langchain.com\\api_reference\\sqlserver\\index.html)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"azdata_cell_guid": "f04dd9d6-d4f2-4425-9c6c-2275ff65c594"
},
"source": [
"## Related\n",
"- Vector store [conceptual guide](https://python.langchain.com/docs/concepts/vectorstores/)\n",
"- Vector store [how-to guides](https://python.langchain.com/docs/how_to/#vector-stores)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -552,7 +552,7 @@
"id": "66690c78",
"metadata": {},
"source": [
"A known limitation of large language models (LLMs) is that their training data can be outdated, or not include the specific domain knowledge that you require.\n",
"A known limitation of large languag models (LLMs) is that their training data can be outdated, or not include the specific domain knowledge that you require.\n",
"\n",
"Take a look at the example below:"
]

View File

@@ -11,7 +11,7 @@ LangChain simplifies every stage of the LLM application lifecycle:
- **Development**: Build your applications using LangChain's open-source [building blocks](/docs/concepts/lcel), [components](/docs/concepts), and [third-party integrations](/docs/integrations/providers/).
Use [LangGraph](/docs/concepts/architecture/#langgraph) to build stateful agents with first-class streaming and human-in-the-loop support.
- **Productionization**: Use [LangSmith](https://docs.smith.langchain.com/) to inspect, monitor and evaluate your chains, so that you can continuously optimize and deploy with confidence.
- **Deployment**: Turn your LangGraph applications into production-ready APIs and Assistants with [LangGraph Platform](https://langchain-ai.github.io/langgraph/cloud/).
- **Deployment**: Turn your LangGraph applications into production-ready APIs and Assistants with [LangGraph Cloud](https://langchain-ai.github.io/langgraph/cloud/).
import ThemedImage from '@theme/ThemedImage';
import useBaseUrl from '@docusaurus/useBaseUrl';
@@ -29,11 +29,11 @@ import useBaseUrl from '@docusaurus/useBaseUrl';
Concretely, the framework consists of the following open-source libraries:
- **`langchain-core`**: Base abstractions and LangChain Expression Language.
- **Integration packages** (e.g. `langchain-openai`, `langchain-anthropic`, etc.): Important integrations have been split into lightweight packages that are co-maintained by the LangChain team and the integration developers.
- Integration packages (e.g. **`langchain-openai`**, **`langchain-anthropic`**, etc.): Important integrations have been split into lightweight packages that are co-maintained by the LangChain team and the integration developers.
- **`langchain`**: Chains, agents, and retrieval strategies that make up an application's cognitive architecture.
- **`langchain-community`**: Third-party integrations that are community maintained.
- **[LangGraph](https://langchain-ai.github.io/langgraph)**: Build robust and stateful multi-actor applications with LLMs by modeling steps as edges and nodes in a graph. Integrates smoothly with LangChain, but can be used without it. To learn more about LangGraph, check out our first LangChain Academy course, *Introduction to LangGraph*, available [here](https://academy.langchain.com/courses/intro-to-langgraph).
- **[LangGraph Platform](https://langchain-ai.github.io/langgraph/concepts/#langgraph-platform)**: Deploy LLM applications built with LangGraph to production.
- **[LangGraph](https://langchain-ai.github.io/langgraph)**: Build robust and stateful multi-actor applications with LLMs by modeling steps as edges and nodes in a graph. Integrates smoothly with LangChain, but can be used without it.
- **[LangGraphPlatform](https://langchain-ai.github.io/langgraph/concepts/#langgraph-platform)**: Deploy LLM applications built with LangGraph to production.
- **[LangSmith](https://docs.smith.langchain.com)**: A developer platform that lets you debug, test, evaluate, and monitor LLM applications.

File diff suppressed because one or more lines are too long

View File

@@ -35,7 +35,6 @@
"json-loader": "^0.5.7",
"prism-react-renderer": "^2.1.0",
"process": "^0.11.10",
"raw-loader": "^4.0.2",
"react": "^18",
"react-dom": "^18",
"typescript": "^5.2.2",

View File

@@ -25,6 +25,8 @@ NOTEBOOKS_NO_EXECUTION = [
"docs/docs/how_to/example_selectors_langsmith.ipynb", # TODO: add langchain-benchmarks; fix cassette issue
"docs/docs/how_to/extraction_long_text.ipynb", # Non-determinism due to batch
"docs/docs/how_to/graph_constructing.ipynb", # Requires local neo4j
"docs/docs/how_to/graph_mapping.ipynb", # Requires local neo4j
"docs/docs/how_to/graph_prompting.ipynb", # Requires local neo4j
"docs/docs/how_to/graph_semantic.ipynb", # Requires local neo4j
"docs/docs/how_to/hybrid.ipynb", # Requires AstraDB instance
"docs/docs/how_to/indexing.ipynb", # Requires local Elasticsearch

View File

@@ -1138,20 +1138,7 @@ const FEATURE_TABLES = {
multiTenancy: true,
local: true,
idsInAddDocuments: false,
},
{
name: "SQLServer",
link: "sqlserver",
deleteById: true,
filtering: true,
searchByVector: true,
searchWithScore: true,
async: false,
passesStandardTests: false,
multiTenancy: false,
local: false,
idsInAddDocuments: false,
},
}
],
}
};

Binary file not shown.

Before

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 117 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 137 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 251 KiB

View File

@@ -62,14 +62,6 @@
"source": "/docs/tutorials/local_rag",
"destination": "/docs/tutorials/rag"
},
{
"source": "/docs/how_to/graph_mapping(/?)",
"destination": "/docs/tutorials/graph#query-validation"
},
{
"source": "/docs/how_to/graph_prompting(/?)",
"destination": "/docs/tutorials/graph#few-shot-prompting"
},
{
"source": "/docs/tutorials/data_generation",
"destination": "https://python.langchain.com/v0.2/docs/tutorials/data_generation/"
@@ -121,10 +113,6 @@
{
"source": "/docs/contributing/:path((?:faq|repo_structure|review_process)/?)",
"destination": "/docs/contributing/reference/:path"
},
{
"source": "/docs/integrations/retrievers/weaviate-hybrid(/?)",
"destination": "/docs/integrations/vectorstores/weaviate/#search-mechanism"
}
]
}

View File

@@ -9043,14 +9043,6 @@ raw-body@2.5.2:
iconv-lite "0.4.24"
unpipe "1.0.0"
raw-loader@^4.0.2:
version "4.0.2"
resolved "https://registry.yarnpkg.com/raw-loader/-/raw-loader-4.0.2.tgz#1aac6b7d1ad1501e66efdac1522c73e59a584eb6"
integrity sha512-ZnScIV3ag9A4wPX/ZayxL/jZH+euYb6FcUinPcgiQW0+UBtEv0O6Q3lGd3cqJ+GHH+rksEv3Pj99oxJ3u3VIKA==
dependencies:
loader-utils "^2.0.0"
schema-utils "^3.0.0"
rc@1.2.8:
version "1.2.8"
resolved "https://registry.yarnpkg.com/rc/-/rc-1.2.8.tgz#cd924bf5200a075b83c188cd6b9e211b7fc0d3ed"

View File

@@ -45,4 +45,5 @@ _e2e_test:
poetry run pip install -e ../../../standard-tests && \
make format lint tests && \
poetry install --with test_integration && \
rm tests/integration_tests/test_vectorstores.py && \
make integration_test

View File

@@ -2,23 +2,23 @@
from __future__ import annotations
import uuid
from typing import (
TYPE_CHECKING,
Any,
Callable,
Iterator,
Iterable,
List,
Optional,
Sequence,
Tuple,
Type,
TypeVar,
)
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.vectorstores import VectorStore
from langchain_core.vectorstores.utils import _cosine_similarity as cosine_similarity
if TYPE_CHECKING:
from langchain_core.documents import Document
VST = TypeVar("VST", bound=VectorStore)
@@ -158,184 +158,40 @@ class __ModuleName__VectorStore(VectorStore):
""" # noqa: E501
def __init__(self, embedding: Embeddings) -> None:
"""Initialize with the given embedding function.
_database: dict[str, tuple[Document, list[float]]] = {}
Args:
embedding: embedding function to use.
"""
self._database: dict[str, dict[str, Any]] = {}
self.embedding = embedding
@classmethod
def from_texts(
cls: Type[__ModuleName__VectorStore],
texts: List[str],
embedding: Embeddings,
def add_texts(
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> __ModuleName__VectorStore:
store = cls(
embedding=embedding,
)
store.add_texts(texts=texts, metadatas=metadatas, **kwargs)
return store
) -> List[str]:
raise NotImplementedError
# optional: add custom async implementations
# @classmethod
# async def afrom_texts(
# cls: Type[VST],
# texts: List[str],
# embedding: Embeddings,
# async def aadd_texts(
# self,
# texts: Iterable[str],
# metadatas: Optional[List[dict]] = None,
# **kwargs: Any,
# ) -> VST:
# ) -> List[str]:
# return await asyncio.get_running_loop().run_in_executor(
# None, partial(cls.from_texts, **kwargs), texts, embedding, metadatas
# None, partial(self.add_texts, **kwargs), texts, metadatas
# )
@property
def embeddings(self) -> Embeddings:
return self.embedding
def add_documents(
self,
documents: List[Document],
ids: Optional[List[str]] = None,
**kwargs: Any,
) -> List[str]:
"""Add documents to the store."""
texts = [doc.page_content for doc in documents]
vectors = self.embedding.embed_documents(texts)
if ids and len(ids) != len(texts):
msg = (
f"ids must be the same length as texts. "
f"Got {len(ids)} ids and {len(texts)} texts."
)
raise ValueError(msg)
id_iterator: Iterator[Optional[str]] = (
iter(ids) if ids else iter(doc.id for doc in documents)
)
ids_ = []
for doc, vector in zip(documents, vectors):
doc_id = next(id_iterator)
doc_id_ = doc_id if doc_id else str(uuid.uuid4())
ids_.append(doc_id_)
self._database[doc_id_] = {
"id": doc_id_,
"vector": vector,
"text": doc.page_content,
"metadata": doc.metadata,
}
return ids_
# optional: add custom async implementations
# async def aadd_documents(
# self,
# documents: List[Document],
# ids: Optional[List[str]] = None,
# **kwargs: Any,
# ) -> List[str]:
# raise NotImplementedError
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
if ids:
for _id in ids:
self._database.pop(_id, None)
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
raise NotImplementedError
# optional: add custom async implementations
# async def adelete(
# self, ids: Optional[List[str]] = None, **kwargs: Any
# ) -> None:
# ) -> Optional[bool]:
# raise NotImplementedError
def get_by_ids(self, ids: Sequence[str], /) -> list[Document]:
"""Get documents by their ids.
Args:
ids: The ids of the documents to get.
Returns:
A list of Document objects.
"""
documents = []
for doc_id in ids:
doc = self._database.get(doc_id)
if doc:
documents.append(
Document(
id=doc["id"],
page_content=doc["text"],
metadata=doc["metadata"],
)
)
return documents
# optional: add custom async implementations
# async def aget_by_ids(self, ids: Sequence[str], /) -> list[Document]:
# raise NotImplementedError
# NOTE: the below helper method implements similarity search for in-memory
# storage. It is optional and not a part of the vector store interface.
def _similarity_search_with_score_by_vector(
self,
embedding: List[float],
k: int = 4,
filter: Optional[Callable[[Document], bool]] = None,
**kwargs: Any,
) -> List[tuple[Document, float, List[float]]]:
# get all docs with fixed order in list
docs = list(self._database.values())
if filter is not None:
docs = [
doc
for doc in docs
if filter(Document(page_content=doc["text"], metadata=doc["metadata"]))
]
if not docs:
return []
similarity = cosine_similarity([embedding], [doc["vector"] for doc in docs])[0]
# get the indices ordered by similarity score
top_k_idx = similarity.argsort()[::-1][:k]
return [
(
# Document
Document(
id=doc_dict["id"],
page_content=doc_dict["text"],
metadata=doc_dict["metadata"],
),
# Score
float(similarity[idx].item()),
# Embedding vector
doc_dict["vector"],
)
for idx in top_k_idx
# Assign using walrus operator to avoid multiple lookups
if (doc_dict := docs[idx])
]
def similarity_search(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]:
embedding = self.embedding.embed_query(query)
return [
doc
for doc, _, _ in self._similarity_search_with_score_by_vector(
embedding=embedding, k=k, **kwargs
)
]
raise NotImplementedError
# optional: add custom async implementations
# async def asimilarity_search(
@@ -348,15 +204,9 @@ class __ModuleName__VectorStore(VectorStore):
# return await asyncio.get_event_loop().run_in_executor(None, func)
def similarity_search_with_score(
self, query: str, k: int = 4, **kwargs: Any
self, *args: Any, **kwargs: Any
) -> List[Tuple[Document, float]]:
embedding = self.embedding.embed_query(query)
return [
(doc, similarity)
for doc, similarity, _ in self._similarity_search_with_score_by_vector(
embedding=embedding, k=k, **kwargs
)
]
raise NotImplementedError
# optional: add custom async implementations
# async def asimilarity_search_with_score(
@@ -368,12 +218,10 @@ class __ModuleName__VectorStore(VectorStore):
# func = partial(self.similarity_search_with_score, *args, **kwargs)
# return await asyncio.get_event_loop().run_in_executor(None, func)
### ADDITIONAL OPTIONAL SEARCH METHODS BELOW ###
# def similarity_search_by_vector(
# self, embedding: List[float], k: int = 4, **kwargs: Any
# ) -> List[Document]:
# raise NotImplementedError
def similarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]:
raise NotImplementedError
# optional: add custom async implementations
# async def asimilarity_search_by_vector(
@@ -385,15 +233,15 @@ class __ModuleName__VectorStore(VectorStore):
# func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs)
# return await asyncio.get_event_loop().run_in_executor(None, func)
# def max_marginal_relevance_search(
# self,
# query: str,
# k: int = 4,
# fetch_k: int = 20,
# lambda_mult: float = 0.5,
# **kwargs: Any,
# ) -> List[Document]:
# raise NotImplementedError
def max_marginal_relevance_search(
self,
query: str,
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
raise NotImplementedError
# optional: add custom async implementations
# async def amax_marginal_relevance_search(
@@ -417,15 +265,15 @@ class __ModuleName__VectorStore(VectorStore):
# )
# return await asyncio.get_event_loop().run_in_executor(None, func)
# def max_marginal_relevance_search_by_vector(
# self,
# embedding: List[float],
# k: int = 4,
# fetch_k: int = 20,
# lambda_mult: float = 0.5,
# **kwargs: Any,
# ) -> List[Document]:
# raise NotImplementedError
def max_marginal_relevance_search_by_vector(
self,
embedding: List[float],
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
raise NotImplementedError
# optional: add custom async implementations
# async def amax_marginal_relevance_search_by_vector(
@@ -437,3 +285,29 @@ class __ModuleName__VectorStore(VectorStore):
# **kwargs: Any,
# ) -> List[Document]:
# raise NotImplementedError
@classmethod
def from_texts(
cls: Type[VST],
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> VST:
raise NotImplementedError
# optional: add custom async implementations
# @classmethod
# async def afrom_texts(
# cls: Type[VST],
# texts: List[str],
# embedding: Embeddings,
# metadatas: Optional[List[dict]] = None,
# **kwargs: Any,
# ) -> VST:
# return await asyncio.get_running_loop().run_in_executor(
# None, partial(cls.from_texts, **kwargs), texts, embedding, metadatas
# )
def _select_relevance_score_fn(self) -> Callable[[float], float]:
raise NotImplementedError

View File

@@ -19,6 +19,6 @@ class Test__ModuleName__Retriever(RetrieversIntegrationTests):
@property
def retriever_query_example(self) -> str:
"""
Returns a str representing the "query" of an example retriever call.
Returns a dictionary representing the "args" of an example retriever call.
"""
return "example query"

View File

@@ -1,16 +1,33 @@
from typing import Generator
from typing import AsyncGenerator, Generator
import pytest
from __module_name__.vectorstores import __ModuleName__VectorStore
from langchain_core.vectorstores import VectorStore
from langchain_tests.integration_tests import VectorStoreIntegrationTests
from langchain_tests.integration_tests import (
AsyncReadWriteTestSuite,
ReadWriteTestSuite,
)
class Test__ModuleName__VectorStore(VectorStoreIntegrationTests):
class Test__ModuleName__VectorStoreSync(ReadWriteTestSuite):
@pytest.fixture()
def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore
"""Get an empty vectorstore for unit tests."""
store = __ModuleName__VectorStore(self.get_embeddings())
store = __ModuleName__VectorStore()
# note: store should be EMPTY at this point
# if you need to delete data, you may do so here
try:
yield store
finally:
# cleanup operations, or deleting data
pass
class Test__ModuleName__VectorStoreAsync(AsyncReadWriteTestSuite):
@pytest.fixture()
async def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore
"""Get an empty vectorstore for unit tests."""
store = __ModuleName__VectorStore()
# note: store should be EMPTY at this point
# if you need to delete data, you may do so here
try:

View File

@@ -5,7 +5,6 @@ Manage LangChain apps
import shutil
import subprocess
import sys
import warnings
from pathlib import Path
from typing import Dict, List, Optional, Tuple
@@ -164,12 +163,6 @@ def add(
langchain app add git+ssh://git@github.com/efriis/simple-pirate.git
"""
if not branch and not repo:
warnings.warn(
"Adding templates from the default branch and repo is deprecated."
" At a minimum, you will have to add `--branch v0.2` for this to work"
)
parsed_deps = parse_dependencies(dependencies, repo, branch, api_path)
project_root = get_package_root(project_dir)

View File

@@ -30,12 +30,10 @@ MODEL_COST_PER_1K_TOKENS = {
"gpt-4o": 0.0025,
"gpt-4o-2024-05-13": 0.005,
"gpt-4o-2024-08-06": 0.0025,
"gpt-4o-2024-11-20": 0.0025,
# GPT-4o output
"gpt-4o-completion": 0.01,
"gpt-4o-2024-05-13-completion": 0.015,
"gpt-4o-2024-08-06-completion": 0.01,
"gpt-4o-2024-11-20-completion": 0.01,
# GPT-4 input
"gpt-4": 0.03,
"gpt-4-0314": 0.03,

View File

@@ -1,316 +0,0 @@
"""Question answering over a graph."""
from __future__ import annotations
import re
from typing import Any, Dict, List, Optional, Union
from langchain.chains.base import Chain
from langchain_core.callbacks import CallbackManagerForChainRun
from langchain_core.language_models import BaseLanguageModel
from langchain_core.messages import (
AIMessage,
BaseMessage,
SystemMessage,
ToolMessage,
)
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import (
BasePromptTemplate,
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
)
from langchain_core.runnables import Runnable
from pydantic import Field
from langchain_community.chains.graph_qa.prompts import (
MEMGRAPH_GENERATION_PROMPT,
MEMGRAPH_QA_PROMPT,
)
from langchain_community.graphs.memgraph_graph import MemgraphGraph
INTERMEDIATE_STEPS_KEY = "intermediate_steps"
FUNCTION_RESPONSE_SYSTEM = """You are an assistant that helps to form nice and human
understandable answers based on the provided information from tools.
Do not add any other information that wasn't present in the tools, and use
very concise style in interpreting results!
"""
def extract_cypher(text: str) -> str:
"""Extract Cypher code from a text.
Args:
text: Text to extract Cypher code from.
Returns:
Cypher code extracted from the text.
"""
# The pattern to find Cypher code enclosed in triple backticks
pattern = r"```(.*?)```"
# Find all matches in the input text
matches = re.findall(pattern, text, re.DOTALL)
return matches[0] if matches else text
def get_function_response(
question: str, context: List[Dict[str, Any]]
) -> List[BaseMessage]:
TOOL_ID = "call_H7fABDuzEau48T10Qn0Lsh0D"
messages = [
AIMessage(
content="",
additional_kwargs={
"tool_calls": [
{
"id": TOOL_ID,
"function": {
"arguments": '{"question":"' + question + '"}',
"name": "GetInformation",
},
"type": "function",
}
]
},
),
ToolMessage(content=str(context), tool_call_id=TOOL_ID),
]
return messages
class MemgraphQAChain(Chain):
"""Chain for question-answering against a graph by generating Cypher statements.
*Security note*: Make sure that the database connection uses credentials
that are narrowly-scoped to only include necessary permissions.
Failure to do so may result in data corruption or loss, since the calling
code may attempt commands that would result in deletion, mutation
of data if appropriately prompted or reading sensitive data if such
data is present in the database.
The best way to guard against such negative outcomes is to (as appropriate)
limit the permissions granted to the credentials used with this tool.
See https://python.langchain.com/docs/security for more information.
"""
graph: MemgraphGraph = Field(exclude=True)
cypher_generation_chain: Runnable
qa_chain: Runnable
graph_schema: str
input_key: str = "query" #: :meta private:
output_key: str = "result" #: :meta private:
top_k: int = 10
"""Number of results to return from the query"""
return_intermediate_steps: bool = False
"""Whether or not to return the intermediate steps along with the final answer."""
return_direct: bool = False
"""Optional cypher validation tool"""
use_function_response: bool = False
"""Whether to wrap the database context as tool/function response"""
allow_dangerous_requests: bool = False
"""Forced user opt-in to acknowledge that the chain can make dangerous requests.
*Security note*: Make sure that the database connection uses credentials
that are narrowly-scoped to only include necessary permissions.
Failure to do so may result in data corruption or loss, since the calling
code may attempt commands that would result in deletion, mutation
of data if appropriately prompted or reading sensitive data if such
data is present in the database.
The best way to guard against such negative outcomes is to (as appropriate)
limit the permissions granted to the credentials used with this tool.
See https://python.langchain.com/docs/security for more information.
"""
def __init__(self, **kwargs: Any) -> None:
"""Initialize the chain."""
super().__init__(**kwargs)
if self.allow_dangerous_requests is not True:
raise ValueError(
"In order to use this chain, you must acknowledge that it can make "
"dangerous requests by setting `allow_dangerous_requests` to `True`."
"You must narrowly scope the permissions of the database connection "
"to only include necessary permissions. Failure to do so may result "
"in data corruption or loss or reading sensitive data if such data is "
"present in the database."
"Only use this chain if you understand the risks and have taken the "
"necessary precautions. "
"See https://python.langchain.com/docs/security for more information."
)
@property
def input_keys(self) -> List[str]:
"""Return the input keys.
:meta private:
"""
return [self.input_key]
@property
def output_keys(self) -> List[str]:
"""Return the output keys.
:meta private:
"""
_output_keys = [self.output_key]
return _output_keys
@property
def _chain_type(self) -> str:
return "graph_cypher_chain"
@classmethod
def from_llm(
cls,
llm: Optional[BaseLanguageModel] = None,
*,
qa_prompt: Optional[BasePromptTemplate] = None,
cypher_prompt: Optional[BasePromptTemplate] = None,
cypher_llm: Optional[BaseLanguageModel] = None,
qa_llm: Optional[Union[BaseLanguageModel, Any]] = None,
qa_llm_kwargs: Optional[Dict[str, Any]] = None,
cypher_llm_kwargs: Optional[Dict[str, Any]] = None,
use_function_response: bool = False,
function_response_system: str = FUNCTION_RESPONSE_SYSTEM,
**kwargs: Any,
) -> MemgraphQAChain:
"""Initialize from LLM."""
if not cypher_llm and not llm:
raise ValueError("Either `llm` or `cypher_llm` parameters must be provided")
if not qa_llm and not llm:
raise ValueError("Either `llm` or `qa_llm` parameters must be provided")
if cypher_llm and qa_llm and llm:
raise ValueError(
"You can specify up to two of 'cypher_llm', 'qa_llm'"
", and 'llm', but not all three simultaneously."
)
if cypher_prompt and cypher_llm_kwargs:
raise ValueError(
"Specifying cypher_prompt and cypher_llm_kwargs together is"
" not allowed. Please pass prompt via cypher_llm_kwargs."
)
if qa_prompt and qa_llm_kwargs:
raise ValueError(
"Specifying qa_prompt and qa_llm_kwargs together is"
" not allowed. Please pass prompt via qa_llm_kwargs."
)
use_qa_llm_kwargs = qa_llm_kwargs if qa_llm_kwargs is not None else {}
use_cypher_llm_kwargs = (
cypher_llm_kwargs if cypher_llm_kwargs is not None else {}
)
if "prompt" not in use_qa_llm_kwargs:
use_qa_llm_kwargs["prompt"] = (
qa_prompt if qa_prompt is not None else MEMGRAPH_QA_PROMPT
)
if "prompt" not in use_cypher_llm_kwargs:
use_cypher_llm_kwargs["prompt"] = (
cypher_prompt
if cypher_prompt is not None
else MEMGRAPH_GENERATION_PROMPT
)
qa_llm = qa_llm or llm
if use_function_response:
try:
qa_llm.bind_tools({}) # type: ignore[union-attr]
response_prompt = ChatPromptTemplate.from_messages(
[
SystemMessage(content=function_response_system),
HumanMessagePromptTemplate.from_template("{question}"),
MessagesPlaceholder(variable_name="function_response"),
]
)
qa_chain = response_prompt | qa_llm | StrOutputParser() # type: ignore
except (NotImplementedError, AttributeError):
raise ValueError("Provided LLM does not support native tools/functions")
else:
qa_chain = use_qa_llm_kwargs["prompt"] | qa_llm | StrOutputParser() # type: ignore
prompt = use_cypher_llm_kwargs["prompt"]
llm_to_use = cypher_llm if cypher_llm is not None else llm
if prompt is not None and llm_to_use is not None:
cypher_generation_chain = prompt | llm_to_use | StrOutputParser() # type: ignore[arg-type]
else:
raise ValueError(
"Missing required components for the cypher generation chain: "
"'prompt' or 'llm'"
)
graph_schema = kwargs["graph"].get_schema
return cls(
graph_schema=graph_schema,
qa_chain=qa_chain,
cypher_generation_chain=cypher_generation_chain,
use_function_response=use_function_response,
**kwargs,
)
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, Any]:
"""Generate Cypher statement, use it to look up in db and answer question."""
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
callbacks = _run_manager.get_child()
question = inputs[self.input_key]
args = {
"question": question,
"schema": self.graph_schema,
}
args.update(inputs)
intermediate_steps: List = []
generated_cypher = self.cypher_generation_chain.invoke(
args, callbacks=callbacks
)
# Extract Cypher code if it is wrapped in backticks
generated_cypher = extract_cypher(generated_cypher)
_run_manager.on_text("Generated Cypher:", end="\n", verbose=self.verbose)
_run_manager.on_text(
generated_cypher, color="green", end="\n", verbose=self.verbose
)
intermediate_steps.append({"query": generated_cypher})
# Retrieve and limit the number of results
# Generated Cypher be null if query corrector identifies invalid schema
if generated_cypher:
context = self.graph.query(generated_cypher)[: self.top_k]
else:
context = []
if self.return_direct:
result = context
else:
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
_run_manager.on_text(
str(context), color="green", end="\n", verbose=self.verbose
)
intermediate_steps.append({"context": context})
if self.use_function_response:
function_response = get_function_response(question, context)
result = self.qa_chain.invoke( # type: ignore
{"question": question, "function_response": function_response},
)
else:
result = self.qa_chain.invoke( # type: ignore
{"question": question, "context": context},
callbacks=callbacks,
)
chain_result: Dict[str, Any] = {"result": result}
if self.return_intermediate_steps:
chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
return chain_result

View File

@@ -411,58 +411,3 @@ NEPTUNE_OPENCYPHER_GENERATION_SIMPLE_PROMPT = PromptTemplate(
input_variables=["schema", "question", "extra_instructions"],
template=NEPTUNE_OPENCYPHER_GENERATION_SIMPLE_TEMPLATE,
)
MEMGRAPH_GENERATION_TEMPLATE = """Your task is to directly translate natural language inquiry into precise and executable Cypher query for Memgraph database.
You will utilize a provided database schema to understand the structure, nodes and relationships within the Memgraph database.
Instructions:
- Use provided node and relationship labels and property names from the
schema which describes the database's structure. Upon receiving a user
question, synthesize the schema to craft a precise Cypher query that
directly corresponds to the user's intent.
- Generate valid executable Cypher queries on top of Memgraph database.
Any explanation, context, or additional information that is not a part
of the Cypher query syntax should be omitted entirely.
- Use Memgraph MAGE procedures instead of Neo4j APOC procedures.
- Do not include any explanations or apologies in your responses.
- Do not include any text except the generated Cypher statement.
- For queries that ask for information or functionalities outside the direct
generation of Cypher queries, use the Cypher query format to communicate
limitations or capabilities. For example: RETURN "I am designed to generate
Cypher queries based on the provided schema only."
Schema:
{schema}
With all the above information and instructions, generate Cypher query for the
user question.
The question is:
{question}"""
MEMGRAPH_GENERATION_PROMPT = PromptTemplate(
input_variables=["schema", "question"], template=MEMGRAPH_GENERATION_TEMPLATE
)
MEMGRAPH_QA_TEMPLATE = """Your task is to form nice and human
understandable answers. The information part contains the provided
information that you must use to construct an answer.
The provided information is authoritative, you must never doubt it or try to
use your internal knowledge to correct it. Make the answer sound as a
response to the question. Do not mention that you based the result on the
given information. Here is an example:
Question: Which managers own Neo4j stocks?
Context:[manager:CTL LLC, manager:JANE STREET GROUP LLC]
Helpful Answer: CTL LLC, JANE STREET GROUP LLC owns Neo4j stocks.
Follow this example when generating answers. If the provided information is
empty, say that you don't know the answer.
Information:
{context}
Question: {question}
Helpful Answer:"""
MEMGRAPH_QA_PROMPT = PromptTemplate(
input_variables=["context", "question"], template=MEMGRAPH_QA_TEMPLATE
)

View File

@@ -27,9 +27,8 @@ logger = logging.getLogger(__name__)
PINECONE = "Pinecone"
QDRANT = "Qdrant"
PGVECTOR = "PGVector"
PINECONE_VECTOR_STORE = "PineconeVectorStore"
SUPPORTED_VECTORSTORES = {PINECONE, QDRANT, PGVECTOR, PINECONE_VECTOR_STORE}
SUPPORTED_VECTORSTORES = {PINECONE, QDRANT, PGVECTOR}
def clear_enforcement_filters(retriever: VectorStoreRetriever) -> None:
@@ -506,7 +505,7 @@ def _set_identity_enforcement_filter(
of the retriever based on the type of the vectorstore.
"""
search_kwargs = retriever.search_kwargs
if retriever.vectorstore.__class__.__name__ in [PINECONE, PINECONE_VECTOR_STORE]:
if retriever.vectorstore.__class__.__name__ == PINECONE:
_apply_pinecone_authorization_filter(search_kwargs, auth_context)
elif retriever.vectorstore.__class__.__name__ == QDRANT:
_apply_qdrant_authorization_filter(search_kwargs, auth_context)

View File

@@ -11,7 +11,6 @@ from typing import (
Dict,
Iterator,
List,
Literal,
Mapping,
Optional,
Sequence,
@@ -213,33 +212,6 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:
return message_dict
_OPENAI_MODELS = [
"o1-mini",
"o1-preview",
"gpt-4o-mini",
"gpt-4o-mini-2024-07-18",
"gpt-4o",
"gpt-4o-2024-08-06",
"gpt-4o-2024-05-13",
"gpt-4-turbo",
"gpt-4-turbo-preview",
"gpt-4-0125-preview",
"gpt-4-1106-preview",
"gpt-3.5-turbo-1106",
"gpt-3.5-turbo",
"gpt-3.5-turbo-0301",
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
"gpt-4",
"gpt-4-0314",
"gpt-4-0613",
"gpt-4-32k",
"gpt-4-32k-0314",
"gpt-4-32k-0613",
]
class ChatLiteLLM(BaseChatModel):
"""Chat model that uses the LiteLLM API."""
@@ -493,9 +465,6 @@ class ChatLiteLLM(BaseChatModel):
def bind_tools(
self,
tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
tool_choice: Optional[
Union[dict, str, Literal["auto", "none", "required", "any"], bool]
] = None,
**kwargs: Any,
) -> Runnable[LanguageModelInput, BaseMessage]:
"""Bind tool-like objects to this chat model.
@@ -507,47 +476,17 @@ class ChatLiteLLM(BaseChatModel):
Can be a dictionary, pydantic model, callable, or BaseTool. Pydantic
models, callables, and BaseTools will be automatically converted to
their schema dictionary representation.
tool_choice: Which tool to require the model to call. Options are:
- str of the form ``"<<tool_name>>"``: calls <<tool_name>> tool.
- ``"auto"``:
automatically selects a tool (including no tool).
- ``"none"``:
does not call a tool.
- ``"any"`` or ``"required"`` or ``True``:
forces least one tool to be called.
- dict of the form:
``{"type": "function", "function": {"name": <<tool_name>>}}``
- ``False`` or ``None``: no effect
tool_choice: Which tool to require the model to call.
Must be the name of the single provided function or
"auto" to automatically determine which function to call
(if any), or a dict of the form:
{"type": "function", "function": {"name": <<tool_name>>}}.
**kwargs: Any additional parameters to pass to the
:class:`~langchain.runnable.Runnable` constructor.
"""
formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
# In case of openai if tool_choice is `any` or if bool has been provided we
# change it to `required` as that is suppored by openai.
if (
(self.model is not None and "azure" in self.model)
or (self.model_name is not None and "azure" in self.model_name)
or (self.model is not None and self.model in _OPENAI_MODELS)
or (self.model_name is not None and self.model_name in _OPENAI_MODELS)
) and (tool_choice == "any" or isinstance(tool_choice, bool)):
tool_choice = "required"
# If tool_choice is bool apart from openai we make it `any`
elif isinstance(tool_choice, bool):
tool_choice = "any"
elif isinstance(tool_choice, dict):
tool_names = [
formatted_tool["function"]["name"] for formatted_tool in formatted_tools
]
if not any(
tool_name == tool_choice["function"]["name"] for tool_name in tool_names
):
raise ValueError(
f"Tool choice {tool_choice} was specified, but the only "
f"provided tools were {tool_names}."
)
return super().bind(tools=formatted_tools, tool_choice=tool_choice, **kwargs)
return super().bind(tools=formatted_tools, **kwargs)
@property
def _identifying_params(self) -> Dict[str, Any]:

View File

@@ -13,142 +13,21 @@ from langchain_community.llms.moonshot import MOONSHOT_SERVICE_URL_BASE, Moonsho
class MoonshotChat(MoonshotCommon, ChatOpenAI): # type: ignore[misc, override, override]
"""Moonshot chat model integration.
"""Moonshot large language models.
Setup:
Install ``openai`` and set environment variables ``MOONSHOT_API_KEY``.
To use, you should have the ``openai`` python package installed, and the
environment variable ``MOONSHOT_API_KEY`` set with your API key.
(Moonshot's chat API is compatible with OpenAI's SDK.)
.. code-block:: bash
Referenced from https://platform.moonshot.cn/docs
pip install openai
export MOONSHOT_API_KEY="your-api-key"
Key init args — completion params:
model: str
Name of Moonshot model to use.
temperature: float
Sampling temperature.
max_tokens: Optional[int]
Max number of tokens to generate.
Key init args — client params:
api_key: Optional[str]
Moonshot API KEY. If not passed in will be read from env var MOONSHOT_API_KEY.
api_base: Optional[str]
Base URL for API requests.
See full list of supported init args and their descriptions in the params section.
Instantiate:
Example:
.. code-block:: python
from langchain_community.chat_models import MoonshotChat
from langchain_community.chat_models.moonshot import MoonshotChat
chat = MoonshotChat(
temperature=0.5,
api_key="your-api-key",
model="moonshot-v1-8k",
# api_base="...",
# other params...
)
Invoke:
.. code-block:: python
messages = [
("system", "你是一名专业的翻译家,可以将用户的中文翻译为英文。"),
("human", "我喜欢编程。"),
]
chat.invoke(messages)
.. code-block:: python
AIMessage(
content='I like programming.',
additional_kwargs={},
response_metadata={
'token_usage': {
'completion_tokens': 5,
'prompt_tokens': 27,
'total_tokens': 32
},
'model_name': 'moonshot-v1-8k',
'system_fingerprint': None,
'finish_reason': 'stop',
'logprobs': None
},
id='run-71c03f4e-6628-41d5-beb6-d2559ae68266-0'
)
Stream:
.. code-block:: python
for chunk in chat.stream(messages):
print(chunk)
.. code-block:: python
content='' additional_kwargs={} response_metadata={} id='run-80d77096-8b83-4c39-a84d-71d9c746da92'
content='I' additional_kwargs={} response_metadata={} id='run-80d77096-8b83-4c39-a84d-71d9c746da92'
content=' like' additional_kwargs={} response_metadata={} id='run-80d77096-8b83-4c39-a84d-71d9c746da92'
content=' programming' additional_kwargs={} response_metadata={} id='run-80d77096-8b83-4c39-a84d-71d9c746da92'
content='.' additional_kwargs={} response_metadata={} id='run-80d77096-8b83-4c39-a84d-71d9c746da92'
content='' additional_kwargs={} response_metadata={'finish_reason': 'stop'} id='run-80d77096-8b83-4c39-a84d-71d9c746da92'
.. code-block:: python
stream = chat.stream(messages)
full = next(stream)
for chunk in stream:
full += chunk
full
.. code-block:: python
AIMessageChunk(
content='I like programming.',
additional_kwargs={},
response_metadata={'finish_reason': 'stop'},
id='run-10c80976-7aa5-4ff7-ba3e-1251665557ef'
)
Async:
.. code-block:: python
await chat.ainvoke(messages)
# stream:
# async for chunk in chat.astream(messages):
# print(chunk)
# batch:
# await chat.abatch([messages])
.. code-block:: python
[AIMessage(content='I like programming.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 27, 'total_tokens': 32}, 'model_name': 'moonshot-v1-8k', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-2938b005-9204-4b9f-b273-1c3272fce9e5-0')]
Response metadata
.. code-block:: python
ai_msg = chat.invoke(messages)
ai_msg.response_metadata
.. code-block:: python
{
'token_usage': {
'completion_tokens': 5,
'prompt_tokens': 27,
'total_tokens': 32
},
'model_name': 'moonshot-v1-8k',
'system_fingerprint': None,
'finish_reason': 'stop',
'logprobs': None
}
""" # noqa: E501
moonshot = MoonshotChat(model="moonshot-v1-8k")
"""
@pre_init
def validate_environment(cls, values: Dict) -> Dict:

View File

@@ -148,6 +148,7 @@ class ChatPerplexity(BaseChatModel):
def _default_params(self) -> Dict[str, Any]:
"""Get the default parameters for calling PerplexityChat API."""
return {
"request_timeout": self.request_timeout,
"max_tokens": self.max_tokens,
"stream": self.streaming,
"temperature": self.temperature,
@@ -221,7 +222,7 @@ class ChatPerplexity(BaseChatModel):
if stop:
params["stop_sequences"] = stop
stream_resp = self.client.chat.completions.create(
messages=message_dicts, stream=True, **params
model=params["model"], messages=message_dicts, stream=True
)
for chunk in stream_resp:
if not isinstance(chunk, dict):
@@ -257,7 +258,9 @@ class ChatPerplexity(BaseChatModel):
return generate_from_stream(stream_iter)
message_dicts, params = self._create_message_dicts(messages, stop)
params = {**params, **kwargs}
response = self.client.chat.completions.create(messages=message_dicts, **params)
response = self.client.chat.completions.create(
model=params["model"], messages=message_dicts
)
message = AIMessage(
content=response.choices[0].message.content,
additional_kwargs={"citations": response.citations},
@@ -268,6 +271,8 @@ class ChatPerplexity(BaseChatModel):
def _invocation_params(self) -> Mapping[str, Any]:
"""Get the parameters used to invoke the model."""
pplx_creds: Dict[str, Any] = {
"api_key": self.pplx_api_key,
"api_base": "https://api.perplexity.ai",
"model": self.model,
}
return {**pplx_creds, **self._default_params}

View File

@@ -5,9 +5,7 @@ from __future__ import annotations
import logging
import mimetypes
import os
import re
import tempfile
import urllib
from abc import abstractmethod
from pathlib import Path, PurePath
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Sequence, Union
@@ -188,18 +186,9 @@ class O365BaseLoader(BaseLoader, BaseModel):
for file in items:
if file.is_file:
if file.mime_type in list(file_mime_types.values()):
source = file.web_url
if re.search(
r"Doc.aspx\?sourcedoc=.*file=([^&]+)", file.web_url
):
source = (
file._parent.web_url
+ "/"
+ urllib.parse.quote(file.name)
)
file.download(to_path=temp_dir, chunk_size=self.chunk_size)
metadata_dict[file.name] = {
"source": source,
"source": file.web_url,
"mime_type": file.mime_type,
"created": str(file.created),
"modified": str(file.modified),
@@ -252,18 +241,9 @@ class O365BaseLoader(BaseLoader, BaseModel):
continue
if file.is_file:
if file.mime_type in list(file_mime_types.values()):
source = file.web_url
if re.search(
r"Doc.aspx\?sourcedoc=.*file=([^&]+)", file.web_url
):
source = (
file._parent.web_url
+ "/"
+ urllib.parse.quote(file.name)
)
file.download(to_path=temp_dir, chunk_size=self.chunk_size)
metadata_dict[file.name] = {
"source": source,
"source": file.web_url,
"mime_type": file.mime_type,
"created": file.created,
"modified": file.modified,

View File

@@ -166,7 +166,6 @@ class ConfluenceLoader(BaseLoader):
include_archived_content: bool = False,
include_attachments: bool = False,
include_comments: bool = False,
include_labels: bool = False,
content_format: ContentFormat = ContentFormat.STORAGE,
limit: Optional[int] = 50,
max_pages: Optional[int] = 1000,
@@ -182,7 +181,6 @@ class ConfluenceLoader(BaseLoader):
self.include_archived_content = include_archived_content
self.include_attachments = include_attachments
self.include_comments = include_comments
self.include_labels = include_labels
self.content_format = content_format
self.limit = limit
self.max_pages = max_pages
@@ -329,20 +327,12 @@ class ConfluenceLoader(BaseLoader):
)
include_attachments = self._resolve_param("include_attachments", kwargs)
include_comments = self._resolve_param("include_comments", kwargs)
include_labels = self._resolve_param("include_labels", kwargs)
content_format = self._resolve_param("content_format", kwargs)
limit = self._resolve_param("limit", kwargs)
max_pages = self._resolve_param("max_pages", kwargs)
ocr_languages = self._resolve_param("ocr_languages", kwargs)
keep_markdown_format = self._resolve_param("keep_markdown_format", kwargs)
keep_newlines = self._resolve_param("keep_newlines", kwargs)
expand = ",".join(
[
content_format.value,
"version",
*(["metadata.labels"] if include_labels else []),
]
)
if not space_key and not page_ids and not label and not cql:
raise ValueError(
@@ -357,14 +347,13 @@ class ConfluenceLoader(BaseLoader):
limit=limit,
max_pages=max_pages,
status="any" if include_archived_content else "current",
expand=expand,
expand=f"{content_format.value},version",
)
yield from self.process_pages(
pages,
include_restricted_content,
include_attachments,
include_comments,
include_labels,
content_format,
ocr_languages=ocr_languages,
keep_markdown_format=keep_markdown_format,
@@ -391,14 +380,13 @@ class ConfluenceLoader(BaseLoader):
limit=limit,
max_pages=max_pages,
include_archived_spaces=include_archived_content,
expand=expand,
expand=f"{content_format.value},version",
)
yield from self.process_pages(
pages,
include_restricted_content,
include_attachments,
include_comments,
False, # labels are not included in the search results
content_format,
ocr_languages,
keep_markdown_format,
@@ -420,8 +408,7 @@ class ConfluenceLoader(BaseLoader):
before_sleep=before_sleep_log(logger, logging.WARNING),
)(self.confluence.get_page_by_id)
page = get_page(
page_id=page_id,
expand=expand,
page_id=page_id, expand=f"{content_format.value},version"
)
if not include_restricted_content and not self.is_public_page(page):
continue
@@ -429,7 +416,6 @@ class ConfluenceLoader(BaseLoader):
page,
include_attachments,
include_comments,
include_labels,
content_format,
ocr_languages,
keep_markdown_format,
@@ -442,25 +428,17 @@ class ConfluenceLoader(BaseLoader):
yield from self._lazy_load()
def _search_content_by_cql(
self,
cql: str,
include_archived_spaces: Optional[bool] = None,
next_url: str = "",
**kwargs: Any,
) -> tuple[List[dict], str]:
if next_url:
response = self.confluence.get(next_url)
else:
url = "rest/api/content/search"
self, cql: str, include_archived_spaces: Optional[bool] = None, **kwargs: Any
) -> List[dict]:
url = "rest/api/content/search"
params: Dict[str, Any] = {"cql": cql}
params.update(kwargs)
if include_archived_spaces is not None:
params["includeArchivedSpaces"] = include_archived_spaces
params: Dict[str, Any] = {"cql": cql}
params.update(kwargs)
if include_archived_spaces is not None:
params["includeArchivedSpaces"] = include_archived_spaces
response = self.confluence.get(url, params=params)
return response.get("results", []), response.get("_links", {}).get("next", "")
response = self.confluence.get(url, params=params)
return response.get("results", [])
def paginate_request(self, retrieval_method: Callable, **kwargs: Any) -> List:
"""Paginate the various methods to retrieve groups of pages.
@@ -485,7 +463,6 @@ class ConfluenceLoader(BaseLoader):
max_pages = kwargs.pop("max_pages")
docs: List[dict] = []
next_url: str = ""
while len(docs) < max_pages:
get_pages = retry(
reraise=True,
@@ -499,15 +476,9 @@ class ConfluenceLoader(BaseLoader):
),
before_sleep=before_sleep_log(logger, logging.WARNING),
)(retrieval_method)
if self.cql: # cursor pagination for CQL
batch, next_url = get_pages(**kwargs, next_url=next_url)
if not next_url:
docs.extend(batch)
break
else:
batch = get_pages(**kwargs, start=len(docs))
if not batch:
break
batch = get_pages(**kwargs, start=len(docs))
if not batch:
break
docs.extend(batch)
return docs[:max_pages]
@@ -527,7 +498,6 @@ class ConfluenceLoader(BaseLoader):
include_restricted_content: bool,
include_attachments: bool,
include_comments: bool,
include_labels: bool,
content_format: ContentFormat,
ocr_languages: Optional[str] = None,
keep_markdown_format: Optional[bool] = False,
@@ -541,7 +511,6 @@ class ConfluenceLoader(BaseLoader):
page,
include_attachments,
include_comments,
include_labels,
content_format,
ocr_languages=ocr_languages,
keep_markdown_format=keep_markdown_format,
@@ -553,7 +522,6 @@ class ConfluenceLoader(BaseLoader):
page: dict,
include_attachments: bool,
include_comments: bool,
include_labels: bool,
content_format: ContentFormat,
ocr_languages: Optional[str] = None,
keep_markdown_format: Optional[bool] = False,
@@ -607,19 +575,10 @@ class ConfluenceLoader(BaseLoader):
]
text = text + "".join(comment_texts)
if include_labels:
labels = [
label["name"]
for label in page.get("metadata", {})
.get("labels", {})
.get("results", [])
]
metadata = {
"title": page["title"],
"id": page["id"],
"source": self.base_url.strip("/") + page["_links"]["webui"],
**({"labels": labels} if include_labels else {}),
}
if "version" in page and "when" in page["version"]:
@@ -709,11 +668,8 @@ class ConfluenceLoader(BaseLoader):
return text
for i, image in enumerate(images):
try:
image_text = pytesseract.image_to_string(image, lang=ocr_languages)
text += f"Page {i + 1}:\n{image_text}\n\n"
except pytesseract.TesseractError as ex:
logger.warning(f"TesseractError: {ex}")
image_text = pytesseract.image_to_string(image, lang=ocr_languages)
text += f"Page {i + 1}:\n{image_text}\n\n"
return text

View File

@@ -71,7 +71,7 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
yield d
def _generate_docs_single(self, result: Any) -> Iterator[Document]:
yield Document(page_content=result.content, metadata=result.as_dict())
yield Document(page_content=result.content, metadata={})
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Lazily parse the blob."""

View File

@@ -145,9 +145,6 @@ if TYPE_CHECKING:
from langchain_community.embeddings.mlflow_gateway import (
MlflowAIGatewayEmbeddings,
)
from langchain_community.embeddings.model2vec import (
Model2vecEmbeddings,
)
from langchain_community.embeddings.modelscope_hub import (
ModelScopeEmbeddings,
)
@@ -292,7 +289,6 @@ __all__ = [
"MlflowAIGatewayEmbeddings",
"MlflowCohereEmbeddings",
"MlflowEmbeddings",
"Model2vecEmbeddings",
"ModelScopeEmbeddings",
"MosaicMLInstructorEmbeddings",
"NLPCloudEmbeddings",
@@ -376,7 +372,6 @@ _module_lookup = {
"MlflowAIGatewayEmbeddings": "langchain_community.embeddings.mlflow_gateway",
"MlflowCohereEmbeddings": "langchain_community.embeddings.mlflow",
"MlflowEmbeddings": "langchain_community.embeddings.mlflow",
"Model2vecEmbeddings": "langchain_community.embeddings.model2vec",
"ModelScopeEmbeddings": "langchain_community.embeddings.modelscope_hub",
"MosaicMLInstructorEmbeddings": "langchain_community.embeddings.mosaicml",
"NLPCloudEmbeddings": "langchain_community.embeddings.nlpcloud",

View File

@@ -1,6 +1,6 @@
import importlib
import importlib.metadata
from typing import Any, Dict, List, Literal, Optional, cast
from typing import Any, Dict, List, Literal, Optional
import numpy as np
from langchain_core.embeddings import Embeddings
@@ -117,7 +117,7 @@ class FastEmbedEmbeddings(BaseModel, Embeddings):
embeddings = self.model.embed(
texts, batch_size=self.batch_size, parallel=self.parallel
)
return [cast(List[float], e.tolist()) for e in embeddings]
return [e.tolist() for e in embeddings]
def embed_query(self, text: str) -> List[float]:
"""Generate query embeddings using FastEmbed.
@@ -133,4 +133,4 @@ class FastEmbedEmbeddings(BaseModel, Embeddings):
text, batch_size=self.batch_size, parallel=self.parallel
)
)
return cast(List[float], query_embeddings.tolist())
return query_embeddings.tolist()

View File

@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional, cast
from typing import Any, Dict, List, Optional
import numpy as np
from langchain_core.embeddings import Embeddings
@@ -73,7 +73,7 @@ class LaserEmbeddings(BaseModel, Embeddings):
embeddings: np.ndarray
embeddings = self._encoder_pipeline.encode_sentences(texts)
return cast(List[List[float]], embeddings.tolist())
return embeddings.tolist()
def embed_query(self, text: str) -> List[float]:
"""Generate single query text embeddings using LASER.
@@ -86,4 +86,4 @@ class LaserEmbeddings(BaseModel, Embeddings):
"""
query_embeddings: np.ndarray
query_embeddings = self._encoder_pipeline.encode_sentences([text])
return cast(List[List[float]], query_embeddings.tolist())[0]
return query_embeddings.tolist()[0]

View File

@@ -1,66 +0,0 @@
"""Wrapper around model2vec embedding models."""
from typing import List
from langchain_core.embeddings import Embeddings
class Model2vecEmbeddings(Embeddings):
"""model2v embedding models.
Install model2vec first, run 'pip install -U model2vec'.
The github repository for model2vec is : https://github.com/MinishLab/model2vec
Example:
.. code-block:: python
from langchain_community.embeddings import Model2vecEmbeddings
embedding = Model2vecEmbeddings("minishlab/potion-base-8M")
embedding.embed_documents([
"It's dangerous to go alone!",
"It's a secret to everybody.",
])
embedding.embed_query(
"Take this with you."
)
"""
def __init__(self, model: str):
"""Initialize embeddings.
Args:
model: Model name.
"""
try:
from model2vec import StaticModel
except ImportError as e:
raise ImportError(
"Unable to import model2vec, please install with "
"`pip install -U model2vec`."
) from e
self._model = StaticModel.from_pretrained(model)
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Embed documents using the model2vec embeddings model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
return self._model.encode_as_sequence(texts)
def embed_query(self, text: str) -> List[float]:
"""Embed a query using the model2vec embeddings model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
return self._model.encode(text)

View File

@@ -1,7 +1,4 @@
from hashlib import md5
from typing import Any, Dict, List, Tuple
from langchain_community.graphs.graph_document import GraphDocument, Relationship
from typing import Any, Dict, List
class KuzuGraph:
@@ -19,19 +16,7 @@ class KuzuGraph:
See https://python.langchain.com/docs/security for more information.
"""
def __init__(
self, db: Any, database: str = "kuzu", allow_dangerous_requests: bool = False
) -> None:
"""Initializes the Kùzu graph database connection."""
if allow_dangerous_requests is not True:
raise ValueError(
"The KuzuGraph class is a powerful tool that can be used to execute "
"arbitrary queries on the database. To enable this functionality, "
"set the `allow_dangerous_requests` parameter to `True` when "
"constructing the KuzuGraph object."
)
def __init__(self, db: Any, database: str = "kuzu") -> None:
try:
import kuzu
except ImportError:
@@ -72,7 +57,7 @@ class KuzuGraph:
if properties[property_name]["dimension"] > 0:
if "shape" in properties[property_name]:
for s in properties[property_name]["shape"]:
list_type_flag += f"[{s}]"
list_type_flag += "[%s]" % s
else:
for i in range(properties[property_name]["dimension"]):
list_type_flag += "[]"
@@ -86,7 +71,7 @@ class KuzuGraph:
rel_tables = self.conn._get_rel_table_names()
for table in rel_tables:
relationships.append(
f"(:{table['src']})-[:{table['name']}]->(:{table['dst']})"
"(:%s)-[:%s]->(:%s)" % (table["src"], table["name"], table["dst"])
)
rel_properties = []
@@ -108,154 +93,3 @@ class KuzuGraph:
f"Relationships properties: {rel_properties}\n"
f"Relationships: {relationships}\n"
)
def _create_chunk_node_table(self) -> None:
self.conn.execute(
"""
CREATE NODE TABLE IF NOT EXISTS Chunk (
id STRING,
text STRING,
type STRING,
PRIMARY KEY(id)
);
"""
)
def _create_entity_node_table(self, node_label: str) -> None:
self.conn.execute(
f"""
CREATE NODE TABLE IF NOT EXISTS {node_label} (
id STRING,
type STRING,
PRIMARY KEY(id)
);
"""
)
def _create_entity_relationship_table(self, rel: Relationship) -> None:
self.conn.execute(
f"""
CREATE REL TABLE IF NOT EXISTS {rel.type} (
FROM {rel.source.type} TO {rel.target.type}
);
"""
)
def add_graph_documents(
self,
graph_documents: List[GraphDocument],
allowed_relationships: List[Tuple[str, str, str]],
include_source: bool = False,
) -> None:
"""
Adds a list of `GraphDocument` objects that represent nodes and relationships
in a graph to a Kùzu backend.
Parameters:
- graph_documents (List[GraphDocument]): A list of `GraphDocument` objects
that contain the nodes and relationships to be added to the graph. Each
`GraphDocument` should encapsulate the structure of part of the graph,
including nodes, relationships, and the source document information.
- allowed_relationships (List[Tuple[str, str, str]]): A list of allowed
relationships that exist in the graph. Each tuple contains three elements:
the source node type, the relationship type, and the target node type.
Required for Kùzu, as the names of the relationship tables that need to
pre-exist are derived from these tuples.
- include_source (bool): If True, stores the source document
and links it to nodes in the graph using the `MENTIONS` relationship.
This is useful for tracing back the origin of data. Merges source
documents based on the `id` property from the source document metadata
if available; otherwise it calculates the MD5 hash of `page_content`
for merging process. Defaults to False.
"""
# Get unique node labels in the graph documents
node_labels = list(
{node.type for document in graph_documents for node in document.nodes}
)
for document in graph_documents:
# Add chunk nodes and create source document relationships if include_source
# is True
if include_source:
self._create_chunk_node_table()
if not document.source.metadata.get("id"):
# Add a unique id to each document chunk via an md5 hash
document.source.metadata["id"] = md5(
document.source.page_content.encode("utf-8")
).hexdigest()
self.conn.execute(
f"""
MERGE (c:Chunk {{id: $id}})
SET c.text = $text,
c.type = "text_chunk"
""", # noqa: F541
parameters={
"id": document.source.metadata["id"],
"text": document.source.page_content,
},
)
for node_label in node_labels:
self._create_entity_node_table(node_label)
# Add entity nodes from data
for node in document.nodes:
self.conn.execute(
f"""
MERGE (e:{node.type} {{id: $id}})
SET e.type = "entity"
""",
parameters={"id": node.id},
)
if include_source:
# If include_source is True, we need to create a relationship table
# between the chunk nodes and the entity nodes
self._create_chunk_node_table()
ddl = "CREATE REL TABLE GROUP IF NOT EXISTS MENTIONS ("
table_names = []
for node_label in node_labels:
table_names.append(f"FROM Chunk TO {node_label}")
table_names = list(set(table_names))
ddl += ", ".join(table_names)
# Add common properties for all the tables here
ddl += ", label STRING, triplet_source_id STRING)"
if ddl:
self.conn.execute(ddl)
# Only allow relationships that exist in the schema
if node.type in node_labels:
self.conn.execute(
f"""
MATCH (c:Chunk {{id: $id}}),
(e:{node.type} {{id: $node_id}})
MERGE (c)-[m:MENTIONS]->(e)
SET m.triplet_source_id = $id
""",
parameters={
"id": document.source.metadata["id"],
"node_id": node.id,
},
)
# Add entity relationships
for rel in document.relationships:
self._create_entity_relationship_table(rel)
# Create relationship
source_label = rel.source.type
source_id = rel.source.id
target_label = rel.target.type
target_id = rel.target.id
self.conn.execute(
f"""
MATCH (e1:{source_label} {{id: $source_id}}),
(e2:{target_label} {{id: $target_id}})
MERGE (e1)-[:{rel.type}]->(e2)
""",
parameters={
"source_id": source_id,
"target_id": target_id,
},
)

View File

@@ -1,272 +1,15 @@
import logging
from hashlib import md5
from typing import Any, Dict, List, Optional
from langchain_core.utils import get_from_dict_or_env
from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship
from langchain_community.graphs.graph_store import GraphStore
logger = logging.getLogger(__name__)
BASE_ENTITY_LABEL = "__Entity__"
from langchain_community.graphs.neo4j_graph import Neo4jGraph
SCHEMA_QUERY = """
SHOW SCHEMA INFO
"""
NODE_PROPERTIES_QUERY = """
CALL schema.node_type_properties()
YIELD nodeType AS label, propertyName AS property, propertyTypes AS type
WITH label AS nodeLabels, collect({key: property, types: type}) AS properties
RETURN {labels: nodeLabels, properties: properties} AS output
"""
REL_QUERY = """
MATCH (n)-[e]->(m)
WITH DISTINCT
labels(n) AS start_node_labels,
type(e) AS rel_type,
labels(m) AS end_node_labels,
e,
keys(e) AS properties
UNWIND CASE WHEN size(properties) > 0 THEN properties ELSE [null] END AS prop
WITH
start_node_labels,
rel_type,
end_node_labels,
CASE WHEN prop IS NULL THEN [] ELSE [prop, valueType(e[prop])] END AS property_info
RETURN
start_node_labels,
rel_type,
end_node_labels,
COLLECT(DISTINCT CASE
WHEN property_info <> []
THEN property_info
ELSE null END) AS properties_info
"""
NODE_IMPORT_QUERY = """
UNWIND $data AS row
CALL merge.node(row.label, row.properties, {}, {})
YIELD node
RETURN distinct 'done' AS result
"""
REL_NODES_IMPORT_QUERY = """
UNWIND $data AS row
MERGE (source {id: row.source_id})
MERGE (target {id: row.target_id})
RETURN distinct 'done' AS result
"""
REL_IMPORT_QUERY = """
UNWIND $data AS row
MATCH (source {id: row.source_id})
MATCH (target {id: row.target_id})
WITH source, target, row
CALL merge.relationship(source, row.type, {}, {}, target, {})
YIELD rel
RETURN distinct 'done' AS result
"""
INCLUDE_DOCS_QUERY = """
MERGE (d:Document {id:$document.metadata.id})
SET d.content = $document.page_content
SET d += $document.metadata
RETURN distinct 'done' AS result
"""
INCLUDE_DOCS_SOURCE_QUERY = """
UNWIND $data AS row
MATCH (source {id: row.source_id}), (d:Document {id: $document.metadata.id})
MERGE (d)-[:MENTIONS]->(source)
RETURN distinct 'done' AS result
"""
NODE_PROPS_TEXT = """
Node labels and properties (name and type) are:
"""
REL_PROPS_TEXT = """
Relationship labels and properties are:
"""
REL_TEXT = """
Nodes are connected with the following relationships:
CALL llm_util.schema("raw")
YIELD *
RETURN *
"""
def get_schema_subset(data: Dict[str, Any]) -> Dict[str, Any]:
return {
"edges": [
{
"end_node_labels": edge["end_node_labels"],
"properties": [
{
"key": prop["key"],
"types": [
{"type": type_item["type"].lower()}
for type_item in prop["types"]
],
}
for prop in edge["properties"]
],
"start_node_labels": edge["start_node_labels"],
"type": edge["type"],
}
for edge in data["edges"]
],
"nodes": [
{
"labels": node["labels"],
"properties": [
{
"key": prop["key"],
"types": [
{"type": type_item["type"].lower()}
for type_item in prop["types"]
],
}
for prop in node["properties"]
],
}
for node in data["nodes"]
],
}
def get_reformated_schema(
nodes: List[Dict[str, Any]], rels: List[Dict[str, Any]]
) -> Dict[str, Any]:
return {
"edges": [
{
"end_node_labels": rel["end_node_labels"],
"properties": [
{"key": prop[0], "types": [{"type": prop[1].lower()}]}
for prop in rel["properties_info"]
],
"start_node_labels": rel["start_node_labels"],
"type": rel["rel_type"],
}
for rel in rels
],
"nodes": [
{
"labels": [_remove_backticks(node["labels"])[1:]],
"properties": [
{
"key": prop["key"],
"types": [
{"type": type_item.lower()} for type_item in prop["types"]
],
}
for prop in node["properties"]
if node["properties"][0]["key"] != ""
],
}
for node in nodes
],
}
def transform_schema_to_text(schema: Dict[str, Any]) -> str:
node_props_data = ""
rel_props_data = ""
rel_data = ""
for node in schema["nodes"]:
node_props_data += f"- labels: (:{':'.join(node['labels'])})\n"
if node["properties"] == []:
continue
node_props_data += " properties:\n"
for prop in node["properties"]:
prop_types_str = " or ".join(
{prop_types["type"] for prop_types in prop["types"]}
)
node_props_data += f" - {prop['key']}: {prop_types_str}\n"
for rel in schema["edges"]:
rel_type = rel["type"]
start_labels = ":".join(rel["start_node_labels"])
end_labels = ":".join(rel["end_node_labels"])
rel_data += f"(:{start_labels})-[:{rel_type}]->(:{end_labels})\n"
if rel["properties"] == []:
continue
rel_props_data += f"- labels: {rel_type}\n properties:\n"
for prop in rel["properties"]:
prop_types_str = " or ".join(
{prop_types["type"].lower() for prop_types in prop["types"]}
)
rel_props_data += f" - {prop['key']}: {prop_types_str}\n"
return "".join(
[
NODE_PROPS_TEXT + node_props_data if node_props_data else "",
REL_PROPS_TEXT + rel_props_data if rel_props_data else "",
REL_TEXT + rel_data if rel_data else "",
]
)
def _remove_backticks(text: str) -> str:
return text.replace("`", "")
def _transform_nodes(nodes: list[Node], baseEntityLabel: bool) -> List[dict]:
transformed_nodes = []
for node in nodes:
properties_dict = node.properties | {"id": node.id}
label = (
[_remove_backticks(node.type), BASE_ENTITY_LABEL]
if baseEntityLabel
else [_remove_backticks(node.type)]
)
node_dict = {"label": label, "properties": properties_dict}
transformed_nodes.append(node_dict)
return transformed_nodes
def _transform_relationships(
relationships: list[Relationship], baseEntityLabel: bool
) -> List[dict]:
transformed_relationships = []
for rel in relationships:
rel_dict = {
"type": _remove_backticks(rel.type),
"source_label": (
[BASE_ENTITY_LABEL]
if baseEntityLabel
else [_remove_backticks(rel.source.type)]
),
"source_id": rel.source.id,
"target_label": (
[BASE_ENTITY_LABEL]
if baseEntityLabel
else [_remove_backticks(rel.target.type)]
),
"target_id": rel.target.id,
}
transformed_relationships.append(rel_dict)
return transformed_relationships
class MemgraphGraph(GraphStore):
class MemgraphGraph(Neo4jGraph):
"""Memgraph wrapper for graph operations.
Parameters:
url (Optional[str]): The URL of the Memgraph database server.
username (Optional[str]): The username for database authentication.
password (Optional[str]): The password for database authentication.
database (str): The name of the database to connect to. Default is 'memgraph'.
refresh_schema (bool): A flag whether to refresh schema information
at initialization. Default is True.
driver_config (Dict): Configuration passed to Neo4j Driver.
*Security note*: Make sure that the database connection uses credentials
that are narrowly-scoped to only include necessary permissions.
Failure to do so may result in data corruption or loss, since the calling
@@ -280,247 +23,49 @@ class MemgraphGraph(GraphStore):
"""
def __init__(
self,
url: Optional[str] = None,
username: Optional[str] = None,
password: Optional[str] = None,
database: Optional[str] = None,
refresh_schema: bool = True,
*,
driver_config: Optional[Dict] = None,
self, url: str, username: str, password: str, *, database: str = "memgraph"
) -> None:
"""Create a new Memgraph graph wrapper instance."""
try:
import neo4j
except ImportError:
raise ImportError(
"Could not import neo4j python package. "
"Please install it with `pip install neo4j`."
)
url = get_from_dict_or_env({"url": url}, "url", "MEMGRAPH_URI")
# if username and password are "", assume auth is disabled
if username == "" and password == "":
auth = None
else:
username = get_from_dict_or_env(
{"username": username},
"username",
"MEMGRAPH_USERNAME",
)
password = get_from_dict_or_env(
{"password": password},
"password",
"MEMGRAPH_PASSWORD",
)
auth = (username, password)
database = get_from_dict_or_env(
{"database": database}, "database", "MEMGRAPH_DATABASE", "memgraph"
)
self._driver = neo4j.GraphDatabase.driver(
url, auth=auth, **(driver_config or {})
)
self._database = database
self.schema: str = ""
self.structured_schema: Dict[str, Any] = {}
# Verify connection
try:
self._driver.verify_connectivity()
except neo4j.exceptions.ServiceUnavailable:
raise ValueError(
"Could not connect to Memgraph database. "
"Please ensure that the url is correct"
)
except neo4j.exceptions.AuthError:
raise ValueError(
"Could not connect to Memgraph database. "
"Please ensure that the username and password are correct"
)
# Set schema
if refresh_schema:
try:
self.refresh_schema()
except neo4j.exceptions.ClientError as e:
raise e
def close(self) -> None:
if self._driver:
logger.info("Closing the driver connection.")
self._driver.close()
self._driver = None
@property
def get_schema(self) -> str:
"""Returns the schema of the Graph database"""
return self.schema
@property
def get_structured_schema(self) -> Dict[str, Any]:
"""Returns the structured schema of the Graph database"""
return self.structured_schema
def query(self, query: str, params: dict = {}) -> List[Dict[str, Any]]:
"""Query the graph.
Args:
query (str): The Cypher query to execute.
params (dict): The parameters to pass to the query.
Returns:
List[Dict[str, Any]]: The list of dictionaries containing the query results.
"""
from neo4j.exceptions import Neo4jError
try:
data, _, _ = self._driver.execute_query(
query,
database_=self._database,
parameters_=params,
)
json_data = [r.data() for r in data]
return json_data
except Neo4jError as e:
if not (
(
( # isCallInTransactionError
e.code == "Neo.DatabaseError.Statement.ExecutionFailed"
or e.code
== "Neo.DatabaseError.Transaction.TransactionStartFailed"
)
and "in an implicit transaction" in e.message
)
or ( # isPeriodicCommitError
e.code == "Neo.ClientError.Statement.SemanticError"
and (
"in an open transaction is not possible" in e.message
or "tried to execute in an explicit transaction" in e.message
)
)
or (
e.code == "Memgraph.ClientError.MemgraphError.MemgraphError"
and ("in multicommand transactions" in e.message)
)
or (
e.code == "Memgraph.ClientError.MemgraphError.MemgraphError"
and "SchemaInfo disabled" in e.message
)
):
raise
# fallback to allow implicit transactions
with self._driver.session(database=self._database) as session:
data = session.run(query, params)
json_data = [r.data() for r in data]
return json_data
super().__init__(url, username, password, database=database)
def refresh_schema(self) -> None:
"""
Refreshes the Memgraph graph schema information.
"""
import ast
from neo4j.exceptions import Neo4jError
db_structured_schema = self.query(SCHEMA_QUERY)[0].get("schema")
assert db_structured_schema is not None
self.structured_schema = db_structured_schema
# leave schema empty if db is empty
if self.query("MATCH (n) RETURN n LIMIT 1") == []:
return
# Format node properties
formatted_node_props = []
# first try with SHOW SCHEMA INFO
try:
result = self.query(SCHEMA_QUERY)[0].get("schema")
if result is not None and isinstance(result, (str, ast.AST)):
schema_result = ast.literal_eval(result)
else:
schema_result = result
assert schema_result is not None
structured_schema = get_schema_subset(schema_result)
self.structured_schema = structured_schema
self.schema = transform_schema_to_text(structured_schema)
return
except Neo4jError as e:
if (
e.code == "Memgraph.ClientError.MemgraphError.MemgraphError"
and "SchemaInfo disabled" in e.message
):
logger.info(
"Schema generation with SHOW SCHEMA INFO query failed. "
"Set --schema-info-enabled=true to use SHOW SCHEMA INFO query. "
"Falling back to alternative queries."
)
# fallback on Cypher without SHOW SCHEMA INFO
nodes = [query["output"] for query in self.query(NODE_PROPERTIES_QUERY)]
rels = self.query(REL_QUERY)
structured_schema = get_reformated_schema(nodes, rels)
self.structured_schema = structured_schema
self.schema = transform_schema_to_text(structured_schema)
def add_graph_documents(
self,
graph_documents: List[GraphDocument],
include_source: bool = False,
baseEntityLabel: bool = False,
) -> None:
"""
Take GraphDocument as input as uses it to construct a graph in Memgraph.
Parameters:
- graph_documents (List[GraphDocument]): A list of GraphDocument objects
that contain the nodes and relationships to be added to the graph. Each
GraphDocument should encapsulate the structure of part of the graph,
including nodes, relationships, and the source document information.
- include_source (bool, optional): If True, stores the source document
and links it to nodes in the graph using the MENTIONS relationship.
This is useful for tracing back the origin of data. Merges source
documents based on the `id` property from the source document metadata
if available; otherwise it calculates the MD5 hash of `page_content`
for merging process. Defaults to False.
- baseEntityLabel (bool, optional): If True, each newly created node
gets a secondary __Entity__ label, which is indexed and improves import
speed and performance. Defaults to False.
"""
if baseEntityLabel:
self.query(
f"CREATE CONSTRAINT ON (b:{BASE_ENTITY_LABEL}) "
"ASSERT b.id IS UNIQUE;"
)
self.query(f"CREATE INDEX ON :{BASE_ENTITY_LABEL}(id);")
self.query(f"CREATE INDEX ON :{BASE_ENTITY_LABEL};")
for document in graph_documents:
if include_source:
if not document.source.metadata.get("id"):
document.source.metadata["id"] = md5(
document.source.page_content.encode("utf-8")
).hexdigest()
self.query(INCLUDE_DOCS_QUERY, {"document": document.source.__dict__})
self.query(
NODE_IMPORT_QUERY,
{"data": _transform_nodes(document.nodes, baseEntityLabel)},
for node_name, properties in db_structured_schema["node_props"].items():
formatted_node_props.append(
f"Node name: '{node_name}', Node properties: {properties}"
)
rel_data = _transform_relationships(document.relationships, baseEntityLabel)
self.query(
REL_NODES_IMPORT_QUERY,
{"data": rel_data},
)
self.query(
REL_IMPORT_QUERY,
{"data": rel_data},
# Format relationship properties
formatted_rel_props = []
for rel_name, properties in db_structured_schema["rel_props"].items():
formatted_rel_props.append(
f"Relationship name: '{rel_name}', "
f"Relationship properties: {properties}"
)
if include_source:
self.query(
INCLUDE_DOCS_SOURCE_QUERY,
{"data": rel_data, "document": document.source.__dict__},
)
self.refresh_schema()
# Format relationships
formatted_rels = [
f"(:{rel['start']})-[:{rel['type']}]->(:{rel['end']})"
for rel in db_structured_schema["relationships"]
]
self.schema = "\n".join(
[
"Node properties are the following:",
*formatted_node_props,
"Relationship properties are the following:",
*formatted_rel_props,
"The relationships are the following:",
*formatted_rels,
]
)

View File

@@ -89,7 +89,7 @@ def get_gmail_credentials(
flow = InstalledAppFlow.from_client_secrets_file(
client_secrets_file, scopes
)
creds = flow.run_local_server(port=0, open_browser=False)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open(token_file, "w") as token:
token.write(creds.to_json())

View File

@@ -2,7 +2,7 @@ from __future__ import annotations
import logging
import re
from typing import TYPE_CHECKING, Any, List, Optional, Pattern, cast
from typing import TYPE_CHECKING, Any, List, Optional, Pattern
from urllib.parse import urlparse
import numpy as np
@@ -18,7 +18,7 @@ def _array_to_buffer(array: List[float], dtype: Any = np.float32) -> bytes:
def _buffer_to_array(buffer: bytes, dtype: Any = np.float32) -> List[float]:
return cast(List[float], np.frombuffer(buffer, dtype=dtype).tolist())
return np.frombuffer(buffer, dtype=dtype).tolist()
class TokenEscaper:

View File

@@ -1545,9 +1545,10 @@ class AzureSearch(VectorStore):
"""Return AzureSearchVectorStoreRetriever initialized from this VectorStore.
Args:
search_type (Optional[str]): Overrides the type of search that
the Retriever should perform. Defaults to `self.search_type`.
Can be "similarity", "hybrid", or "semantic_hybrid".
search_type (Optional[str]): Defines the type of search that
the Retriever should perform.
Can be "similarity" (default), "hybrid", or
"semantic_hybrid".
search_kwargs (Optional[Dict]): Keyword arguments to pass to the
search function. Can include things like:
score_threshold: Minimum relevance threshold
@@ -1560,9 +1561,6 @@ class AzureSearch(VectorStore):
Returns:
AzureSearchVectorStoreRetriever: Retriever class for VectorStore.
"""
search_type = kwargs.get("search_type", self.search_type)
kwargs["search_type"] = search_type
tags = kwargs.pop("tags", None) or []
tags.extend(self._get_retriever_tags())
return AzureSearchVectorStoreRetriever(vectorstore=self, **kwargs, tags=tags)

View File

@@ -1,4 +1,4 @@
from typing import Any, Iterable, List, Optional, Tuple, cast
from typing import Any, Iterable, List, Optional, Tuple
from uuid import uuid4
import numpy as np
@@ -111,7 +111,7 @@ class SemaDB(VectorStore):
embed_matrix = embed_matrix / np.linalg.norm(
embed_matrix, axis=1, keepdims=True
)
embeddings = cast(List[List[float]], embed_matrix.tolist())
embeddings = embed_matrix.tolist()
# Create points
ids: List[str] = []
points = []
@@ -186,7 +186,7 @@ class SemaDB(VectorStore):
if self.distance_strategy == DistanceStrategy.COSINE:
vec = np.array(embedding)
vec = vec / np.linalg.norm(vec)
embedding = cast(List[float], vec.tolist())
embedding = vec.tolist()
# Perform search request
payload = {
"vector": embedding,

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
import numpy as np
from langchain_core.documents import Document
@@ -75,7 +75,7 @@ class USearch(VectorStore):
self.index.add(np.array(ids), np.array(embeddings))
self.docstore.add(dict(zip(ids, documents)))
self.ids.extend(ids)
return cast(List[str], ids.tolist())
return ids.tolist()
def similarity_search_with_score(
self,
@@ -171,4 +171,4 @@ class USearch(VectorStore):
usearch = guard_import("usearch.index")
index = usearch.Index(ndim=len(embeddings[0]), metric=metric)
index.add(np.array(ids), np.array(embeddings))
return cls(embedding, index, docstore, cast(List[str], ids.tolist()))
return cls(embedding, index, docstore, ids.tolist())

File diff suppressed because it is too large Load Diff

View File

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "langchain-community"
version = "0.3.10"
version = "0.3.9"
description = "Community contributed LangChain integrations."
authors = []
license = "MIT"
@@ -30,8 +30,8 @@ ignore-words-list = "momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogy
[tool.poetry.dependencies]
python = ">=3.9,<4.0"
langchain-core = "^0.3.22"
langchain = "^0.3.10"
langchain-core = "^0.3.21"
langchain = "^0.3.8"
SQLAlchemy = ">=1.4,<3"
requests = "^2"
PyYAML = ">=5.3"
@@ -39,7 +39,7 @@ aiohttp = "^3.8.3"
tenacity = ">=8.1.0,!=8.4.0,<10"
dataclasses-json = ">= 0.5.7, < 0.7"
pydantic-settings = "^2.4.0"
langsmith = ">=0.1.125,<0.3"
langsmith = "^0.1.125"
httpx-sse = "^0.4.0"
[[tool.poetry.dependencies.numpy]]
version = ">=1.22.4,<2"

View File

@@ -1,44 +1,24 @@
import os
from langchain_core.documents import Document
from langchain_community.graphs import MemgraphGraph
from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship
from langchain_community.graphs.memgraph_graph import NODE_PROPERTIES_QUERY, REL_QUERY
test_data = [
GraphDocument(
nodes=[Node(id="foo", type="foo"), Node(id="bar", type="bar")],
relationships=[
Relationship(
source=Node(id="foo", type="foo"),
target=Node(id="bar", type="bar"),
type="REL",
)
],
source=Document(page_content="source document"),
)
]
def test_cypher_return_correct_schema() -> None:
"""Test that chain returns direct results."""
url = os.environ.get("MEMGRAPH_URI", "bolt://localhost:7687")
username = os.environ.get("MEMGRAPH_USERNAME", "")
password = os.environ.get("MEMGRAPH_PASSWORD", "")
assert url is not None
assert username is not None
assert password is not None
graph = MemgraphGraph(url=url, username=username, password=password)
# Drop graph
graph.query("STORAGE MODE IN_MEMORY_ANALYTICAL")
graph.query("DROP GRAPH")
graph.query("STORAGE MODE IN_MEMORY_TRANSACTIONAL")
graph = MemgraphGraph(
url=url,
username=username,
password=password,
)
# Delete all nodes in the graph
graph.query("MATCH (n) DETACH DELETE n")
# Create two nodes and a relationship
graph.query(
"""
@@ -51,123 +31,32 @@ def test_cypher_return_correct_schema() -> None:
)
# Refresh schema information
graph.refresh_schema()
relationships = graph.query(
"CALL llm_util.schema('raw') YIELD schema "
"WITH schema.relationships AS relationships "
"UNWIND relationships AS relationship "
"RETURN relationship['start'] AS start, "
"relationship['type'] AS type, "
"relationship['end'] AS end "
"ORDER BY start, type, end;"
)
node_properties = graph.query(NODE_PROPERTIES_QUERY)
relationships = graph.query(REL_QUERY)
expected_node_properties = [
{
"output": {
"labels": ":`LabelA`",
"properties": [{"key": "property_a", "types": ["String"]}],
}
},
{"output": {"labels": ":`LabelB`", "properties": [{"key": "", "types": []}]}},
{"output": {"labels": ":`LabelC`", "properties": [{"key": "", "types": []}]}},
]
node_props = graph.query(
"CALL llm_util.schema('raw') YIELD schema "
"WITH schema.node_props AS nodes "
"WITH nodes['LabelA'] AS properties "
"UNWIND properties AS property "
"RETURN property['property'] AS prop, "
"property['type'] AS type "
"ORDER BY prop ASC;"
)
expected_relationships = [
{
"start_node_labels": ["LabelA"],
"rel_type": "REL_TYPE",
"end_node_labels": ["LabelC"],
"properties_info": [["rel_prop", "STRING"]],
},
{
"start_node_labels": ["LabelA"],
"rel_type": "REL_TYPE",
"end_node_labels": ["LabelB"],
"properties_info": [],
},
{"start": "LabelA", "type": "REL_TYPE", "end": "LabelB"},
{"start": "LabelA", "type": "REL_TYPE", "end": "LabelC"},
]
graph.close()
expected_node_props = [{"prop": "property_a", "type": "str"}]
assert node_properties == expected_node_properties
assert relationships == expected_relationships
def test_add_graph_documents() -> None:
"""Test that Memgraph correctly imports graph document."""
url = os.environ.get("MEMGRAPH_URI", "bolt://localhost:7687")
username = os.environ.get("MEMGRAPH_USERNAME", "")
password = os.environ.get("MEMGRAPH_PASSWORD", "")
assert url is not None
assert username is not None
assert password is not None
graph = MemgraphGraph(
url=url, username=username, password=password, refresh_schema=False
)
# Drop graph
graph.query("STORAGE MODE IN_MEMORY_ANALYTICAL")
graph.query("DROP GRAPH")
graph.query("STORAGE MODE IN_MEMORY_TRANSACTIONAL")
# Create KG
graph.add_graph_documents(test_data)
output = graph.query("MATCH (n) RETURN labels(n) AS label, count(*) AS count")
# Close the connection
graph.close()
assert output == [{"label": ["bar"], "count": 1}, {"label": ["foo"], "count": 1}]
def test_add_graph_documents_base_entity() -> None:
"""Test that Memgraph correctly imports graph document with Entity label."""
url = os.environ.get("MEMGRAPH_URI", "bolt://localhost:7687")
username = os.environ.get("MEMGRAPH_USERNAME", "")
password = os.environ.get("MEMGRAPH_PASSWORD", "")
assert url is not None
assert username is not None
assert password is not None
graph = MemgraphGraph(
url=url, username=username, password=password, refresh_schema=False
)
# Drop graph
graph.query("STORAGE MODE IN_MEMORY_ANALYTICAL")
graph.query("DROP GRAPH")
graph.query("STORAGE MODE IN_MEMORY_TRANSACTIONAL")
# Create KG
graph.add_graph_documents(test_data, baseEntityLabel=True)
output = graph.query("MATCH (n) RETURN labels(n) AS label, count(*) AS count")
# Close the connection
graph.close()
assert output == [
{"label": ["__Entity__", "bar"], "count": 1},
{"label": ["__Entity__", "foo"], "count": 1},
]
def test_add_graph_documents_include_source() -> None:
"""Test that Memgraph correctly imports graph document with source included."""
url = os.environ.get("MEMGRAPH_URI", "bolt://localhost:7687")
username = os.environ.get("MEMGRAPH_USERNAME", "")
password = os.environ.get("MEMGRAPH_PASSWORD", "")
assert url is not None
assert username is not None
assert password is not None
graph = MemgraphGraph(
url=url, username=username, password=password, refresh_schema=False
)
# Drop graph
graph.query("STORAGE MODE IN_MEMORY_ANALYTICAL")
graph.query("DROP GRAPH")
graph.query("STORAGE MODE IN_MEMORY_TRANSACTIONAL")
# Create KG
graph.add_graph_documents(test_data, include_source=True)
output = graph.query("MATCH (n) RETURN labels(n) AS label, count(*) AS count")
# Close the connection
graph.close()
assert output == [
{"label": ["bar"], "count": 1},
{"label": ["foo"], "count": 1},
{"label": ["Document"], "count": 1},
]
assert node_props == expected_node_props

View File

@@ -3,15 +3,27 @@
import uuid
import pytest
from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests
from langchain_tests.integration_tests.vectorstores import (
AsyncReadWriteTestSuite,
ReadWriteTestSuite,
)
from langchain_community.vectorstores import ApertureDB
class TestApertureStandard(VectorStoreIntegrationTests):
class TestApertureDBReadWriteTestSuite(ReadWriteTestSuite):
@pytest.fixture
def vectorstore(self) -> ApertureDB:
descriptor_set = uuid.uuid4().hex # Fresh descriptor set for each test
return ApertureDB(
embeddings=self.get_embeddings(), descriptor_set=descriptor_set
)
class TestAsyncApertureDBReadWriteTestSuite(AsyncReadWriteTestSuite):
@pytest.fixture
async def vectorstore(self) -> ApertureDB:
descriptor_set = uuid.uuid4().hex # Fresh descriptor set for each test
return ApertureDB(
embeddings=self.get_embeddings(), descriptor_set=descriptor_set
)

View File

@@ -3,7 +3,7 @@
import math
import os
import tempfile
from typing import List, cast
from typing import List
import numpy as np
import pytest
@@ -60,13 +60,13 @@ class RandomEmbeddings(Embeddings):
"""Fake embeddings with random vectors. For testing purposes."""
def embed_documents(self, texts: List[str]) -> List[List[float]]:
return [cast(list[float], np.random.rand(100).tolist()) for _ in texts]
return [np.random.rand(100).tolist() for _ in texts]
def embed_query(self, text: str) -> List[float]:
return cast(list[float], np.random.rand(100).tolist())
return np.random.rand(100).tolist()
def embed_image(self, uris: List[str]) -> List[List[float]]:
return [cast(list[float], np.random.rand(100).tolist()) for _ in uris]
return [np.random.rand(100).tolist() for _ in uris]
class IncrementalEmbeddings(Embeddings):

View File

@@ -195,36 +195,6 @@ class TestConfluenceLoader:
assert mock_confluence.cql.call_count == 0
assert mock_confluence.get_page_child_by_type.call_count == 0
@pytest.mark.requires("markdownify")
def test_confluence_loader_when_include_lables_set_to_true(
self, mock_confluence: MagicMock
) -> None:
# one response with two pages
mock_confluence.get_all_pages_from_space.return_value = [
self._get_mock_page("123", include_labels=True),
self._get_mock_page("456", include_labels=False),
]
mock_confluence.get_all_restrictions_for_content.side_effect = [
self._get_mock_page_restrictions("123"),
self._get_mock_page_restrictions("456"),
]
conflence_loader = self._get_mock_confluence_loader(
mock_confluence,
space_key=self.MOCK_SPACE_KEY,
include_labels=True,
max_pages=2,
)
documents = conflence_loader.load()
assert mock_confluence.get_all_pages_from_space.call_count == 1
assert len(documents) == 2
assert all(isinstance(doc, Document) for doc in documents)
assert documents[0].metadata["labels"] == ["l1", "l2"]
assert documents[1].metadata["labels"] == []
def _get_mock_confluence_loader(
self, mock_confluence: MagicMock, **kwargs: Any
) -> ConfluenceLoader:
@@ -238,10 +208,7 @@ class TestConfluenceLoader:
return confluence_loader
def _get_mock_page(
self,
page_id: str,
content_format: ContentFormat = ContentFormat.STORAGE,
include_labels: bool = False,
self, page_id: str, content_format: ContentFormat = ContentFormat.STORAGE
) -> Dict:
return {
"id": f"{page_id}",
@@ -249,20 +216,6 @@ class TestConfluenceLoader:
"body": {
f"{content_format.name.lower()}": {"value": f"<p>Content {page_id}</p>"}
},
**(
{
"metadata": {
"labels": {
"results": [
{"prefix": "global", "name": "l1", "id": "111"},
{"prefix": "global", "name": "l2", "id": "222"},
]
}
}
if include_labels
else {},
}
),
"status": "current",
"type": "page",
"_links": {

Some files were not shown because too many files have changed in this diff Show More